mirror of
https://github.com/danog/liquid.git
synced 2024-11-26 23:24:38 +01:00
86 lines
2.5 KiB
Go
86 lines
2.5 KiB
Go
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
// Scan breaks a string into a sequence of Tokens.
|
|
func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {
|
|
|
|
// Apply defaults
|
|
if len(delims) != 4 {
|
|
delims = []string{"{{", "}}", "{%", "%}"}
|
|
}
|
|
tokenMatcher := formTokenMatcher(delims)
|
|
|
|
// TODO error on unterminated {{ and {%
|
|
// TODO probably an error when a tag contains a {{ or {%, at least outside of a string
|
|
p, pe := 0, len(data)
|
|
for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
|
|
ts, te := m[0], m[1]
|
|
if p < ts {
|
|
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
|
|
loc.LineNo += strings.Count(data[p:ts], "\n")
|
|
}
|
|
source := data[ts:te]
|
|
switch {
|
|
case data[ts:ts+len(delims[0])] == delims[0]:
|
|
tok := Token{
|
|
Type: ObjTokenType,
|
|
SourceLoc: loc,
|
|
Source: source,
|
|
Args: data[m[2]:m[3]],
|
|
TrimLeft: source[2] == '-',
|
|
TrimRight: source[len(source)-3] == '-',
|
|
}
|
|
tokens = append(tokens, tok)
|
|
case data[ts:ts+len(delims[2])] == delims[2]:
|
|
tok := Token{
|
|
Type: TagTokenType,
|
|
SourceLoc: loc,
|
|
Source: source,
|
|
Name: data[m[4]:m[5]],
|
|
TrimLeft: source[2] == '-',
|
|
TrimRight: source[len(source)-3] == '-',
|
|
}
|
|
if m[6] > 0 {
|
|
tok.Args = data[m[6]:m[7]]
|
|
}
|
|
tokens = append(tokens, tok)
|
|
}
|
|
loc.LineNo += strings.Count(source, "\n")
|
|
p = te
|
|
}
|
|
if p < pe {
|
|
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
func formTokenMatcher(delims []string) *regexp.Regexp {
|
|
// On ending a tag we need to exclude anything that appears to be ending a tag that's nested
|
|
// inside the tag. We form the exclusion expression here.
|
|
// For example, if delims is default the exclusion expression is "[^%]|%[^}]".
|
|
// If tagRight is "TAG!RIGHT" then expression is
|
|
// [^T]|T[^A]|TA[^G]|TAG[^!]|TAG![^R]|TAG!R[^I]|TAG!RI[^G]|TAG!RIG[^H]|TAG!RIGH[^T]
|
|
var exclusion []string
|
|
for idx, val := range delims[3] {
|
|
exclusion = append(exclusion, "[^"+string(val)+"]")
|
|
if idx > 0 {
|
|
exclusion[idx] = delims[3][0:idx] + exclusion[idx]
|
|
}
|
|
}
|
|
|
|
tokenMatcher := regexp.MustCompile(
|
|
fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:%v)+?))?\s*-?%s`,
|
|
// QuoteMeta will escape any of these that are regex commands
|
|
regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),
|
|
regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "|"), regexp.QuoteMeta(delims[3]),
|
|
),
|
|
)
|
|
|
|
return tokenMatcher
|
|
}
|