1
0
mirror of https://github.com/danog/liquid.git synced 2024-11-27 01:44:39 +01:00
liquid/parser/scanner.go

86 lines
2.5 KiB
Go
Raw Permalink Normal View History

2017-07-07 11:41:37 +02:00
package parser
2017-06-25 17:23:20 +02:00
import (
2017-07-21 04:32:06 +02:00
"fmt"
2017-06-25 17:23:20 +02:00
"regexp"
2017-06-30 14:42:11 +02:00
"strings"
2017-06-25 17:23:20 +02:00
)
2017-07-09 17:18:35 +02:00
// Scan breaks a string into a sequence of Tokens.
2017-07-24 04:38:02 +02:00
func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {
// Apply defaults
if len(delims) != 4 {
delims = []string{"{{", "}}", "{%", "%}"}
2017-07-21 04:32:06 +02:00
}
2017-07-24 04:38:02 +02:00
tokenMatcher := formTokenMatcher(delims)
2017-07-21 04:32:06 +02:00
2017-06-26 18:41:41 +02:00
// TODO error on unterminated {{ and {%
// TODO probably an error when a tag contains a {{ or {%, at least outside of a string
2017-07-16 20:15:11 +02:00
p, pe := 0, len(data)
2017-07-09 17:18:35 +02:00
for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
2017-06-25 17:23:20 +02:00
ts, te := m[0], m[1]
if p < ts {
2017-07-16 20:15:11 +02:00
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
loc.LineNo += strings.Count(data[p:ts], "\n")
2017-06-25 17:23:20 +02:00
}
2017-06-30 14:42:11 +02:00
source := data[ts:te]
2017-07-24 04:38:02 +02:00
switch {
case data[ts:ts+len(delims[0])] == delims[0]:
2017-07-16 20:15:11 +02:00
tok := Token{
Type: ObjTokenType,
2017-07-16 20:15:11 +02:00
SourceLoc: loc,
Source: source,
Args: data[m[2]:m[3]],
2017-07-16 20:15:11 +02:00
TrimLeft: source[2] == '-',
TrimRight: source[len(source)-3] == '-',
}
tokens = append(tokens, tok)
2017-07-24 04:38:02 +02:00
case data[ts:ts+len(delims[2])] == delims[2]:
2017-07-16 20:15:11 +02:00
tok := Token{
Type: TagTokenType,
2017-07-16 20:15:11 +02:00
SourceLoc: loc,
Source: source,
Name: data[m[4]:m[5]],
2017-07-16 20:15:11 +02:00
TrimLeft: source[2] == '-',
TrimRight: source[len(source)-3] == '-',
2017-06-29 18:20:16 +02:00
}
2017-06-25 17:23:20 +02:00
if m[6] > 0 {
2017-07-16 20:15:11 +02:00
tok.Args = data[m[6]:m[7]]
2017-06-25 17:23:20 +02:00
}
2017-07-16 20:15:11 +02:00
tokens = append(tokens, tok)
2017-06-25 17:23:20 +02:00
}
2017-07-16 20:15:11 +02:00
loc.LineNo += strings.Count(source, "\n")
2017-06-25 23:26:14 +02:00
p = te
2017-06-25 17:23:20 +02:00
}
if p < pe {
2017-07-16 20:15:11 +02:00
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})
2017-06-25 17:23:20 +02:00
}
2017-07-16 20:15:11 +02:00
return tokens
2017-06-25 17:23:20 +02:00
}
2017-07-24 03:00:05 +02:00
2017-07-24 04:38:02 +02:00
func formTokenMatcher(delims []string) *regexp.Regexp {
// On ending a tag we need to exclude anything that appears to be ending a tag that's nested
// inside the tag. We form the exclusion expression here.
2017-07-24 04:38:02 +02:00
// For example, if delims is default the exclusion expression is "[^%]|%[^}]".
// If tagRight is "TAG!RIGHT" then expression is
// [^T]|T[^A]|TA[^G]|TAG[^!]|TAG![^R]|TAG!R[^I]|TAG!RI[^G]|TAG!RIG[^H]|TAG!RIGH[^T]
var exclusion []string
2017-07-24 04:38:02 +02:00
for idx, val := range delims[3] {
exclusion = append(exclusion, "[^"+string(val)+"]")
if idx > 0 {
exclusion[idx] = delims[3][0:idx] + exclusion[idx]
}
}
tokenMatcher := regexp.MustCompile(
fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:%v)+?))?\s*-?%s`,
// QuoteMeta will escape any of these that are regex commands
regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),
regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "|"), regexp.QuoteMeta(delims[3]),
),
)
return tokenMatcher
2017-07-24 03:00:05 +02:00
}