liquid/parser/scanner.go

package parser

import (
	"fmt"
	"regexp"
	"strings"
)

// Scan breaks a string into a sequence of Tokens.
func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {

	// Apply defaults
	if len(delims) != 4 {
		delims = []string{"{{", "}}", "{%", "%}"}
	}
	tokenMatcher := formTokenMatcher(delims)

	// TODO error on unterminated {{ and {%
	// TODO probably an error when a tag contains a {{ or {%, at least outside of a string
	p, pe := 0, len(data)
	for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
		ts, te := m[0], m[1]
		if p < ts {
			tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
			loc.LineNo += strings.Count(data[p:ts], "\n")
		}
		source := data[ts:te]
		switch {
		case data[ts:ts+len(delims[0])] == delims[0]:
			tok := Token{
				Type:      ObjTokenType,
				SourceLoc: loc,
				Source:    source,
				Args:      data[m[2]:m[3]],
				TrimLeft:  source[2] == '-',
				TrimRight: source[len(source)-3] == '-',
			}
			tokens = append(tokens, tok)
		case data[ts:ts+len(delims[2])] == delims[2]:
			tok := Token{
				Type:      TagTokenType,
				SourceLoc: loc,
				Source:    source,
				Name:      data[m[4]:m[5]],
				TrimLeft:  source[2] == '-',
				TrimRight: source[len(source)-3] == '-',
			}
			if m[6] > 0 {
				tok.Args = data[m[6]:m[7]]
			}
			tokens = append(tokens, tok)
		}
		loc.LineNo += strings.Count(source, "\n")
		p = te
	}
	if p < pe {
		tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})
	}
	return tokens
}

func formTokenMatcher(delims []string) *regexp.Regexp {
	// On ending a tag we need to exclude anything that appears to be ending a tag that's nested
	// inside the tag. We form the exclusion expression here.
	// For example, if delims is default the exclusion expression is "[^%]|%[^}]".
	// If tagRight is "TAG!RIGHT" then expression is
	// [^T]|T[^A]|TA[^G]|TAG[^!]|TAG![^R]|TAG!R[^I]|TAG!RI[^G]|TAG!RIG[^H]|TAG!RIGH[^T]
	var exclusion []string
	for idx, val := range delims[3] {
		exclusion = append(exclusion, "[^"+string(val)+"]")
		if idx > 0 {
			exclusion[idx] = delims[3][0:idx] + exclusion[idx]
		}
	}

	tokenMatcher := regexp.MustCompile(
		fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:%v)+?))?\s*-?%s`,
			// QuoteMeta will escape any of these that are regex commands
			regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),
			regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "|"), regexp.QuoteMeta(delims[3]),
		),
	)

	return tokenMatcher
}
Split package render->parser 2017-07-07 11:41:37 +02:00			`package parser`
Initial 2017-06-25 17:23:20 +02:00
			`import (`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`"fmt"`
Initial 2017-06-25 17:23:20 +02:00			`"regexp"`
Record source line number 2017-06-30 14:42:11 +02:00			`"strings"`
Initial 2017-06-25 17:23:20 +02:00			`)`

Rename chunk -> token 2017-07-09 17:18:35 +02:00			`// Scan breaks a string into a sequence of Tokens.`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {`

			`// Apply defaults`
			`if len(delims) != 4 {`
			`delims = []string{"{{", "}}", "{%", "%}"}`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`}`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`tokenMatcher := formTokenMatcher(delims)`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00
Docs 2017-06-26 18:41:41 +02:00			`// TODO error on unterminated {{ and {%`
			`// TODO probably an error when a tag contains a {{ or {%, at least outside of a string`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`p, pe := 0, len(data)`
Rename chunk -> token 2017-07-09 17:18:35 +02:00			`for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {`
Initial 2017-06-25 17:23:20 +02:00			`ts, te := m[0], m[1]`
			`if p < ts {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})`
			`loc.LineNo += strings.Count(data[p:ts], "\n")`
Initial 2017-06-25 17:23:20 +02:00			`}`
Record source line number 2017-06-30 14:42:11 +02:00			`source := data[ts:te]`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`switch {`
			`case data[ts:ts+len(delims[0])] == delims[0]:`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok := Token{`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Type: ObjTokenType,`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`SourceLoc: loc,`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Source: source,`
			`Args: data[m[2]:m[3]],`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`TrimLeft: source[2] == '-',`
			`TrimRight: source[len(source)-3] == '-',`
			`}`
			`tokens = append(tokens, tok)`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`case data[ts:ts+len(delims[2])] == delims[2]:`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok := Token{`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Type: TagTokenType,`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`SourceLoc: loc,`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Source: source,`
			`Name: data[m[4]:m[5]],`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`TrimLeft: source[2] == '-',`
			`TrimRight: source[len(source)-3] == '-',`
Implement break, continue 2017-06-29 18:20:16 +02:00			`}`
Initial 2017-06-25 17:23:20 +02:00			`if m[6] > 0 {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok.Args = data[m[6]:m[7]]`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, tok)`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`loc.LineNo += strings.Count(source, "\n")`
Implement {% else %}, {% elsif %} 2017-06-25 23:26:14 +02:00			`p = te`
Initial 2017-06-25 17:23:20 +02:00			`}`
			`if p < pe {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`return tokens`
Initial 2017-06-25 17:23:20 +02:00			`}`
Add comments and update tests 2017-07-24 03:00:05 +02:00
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`func formTokenMatcher(delims []string) *regexp.Regexp {`
			`// On ending a tag we need to exclude anything that appears to be ending a tag that's nested`
Follow go style guide re declaring empty slices 2017-09-03 18:13:20 +02:00			`// inside the tag. We form the exclusion expression here.`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`// For example, if delims is default the exclusion expression is "[^%]\|%[^}]".`
			`// If tagRight is "TAG!RIGHT" then expression is`
			`// [^T]\|T[^A]\|TA[^G]\|TAG[^!]\|TAG![^R]\|TAG!R[^I]\|TAG!RI[^G]\|TAG!RIG[^H]\|TAG!RIGH[^T]`
Follow go style guide re declaring empty slices 2017-09-03 18:13:20 +02:00			`var exclusion []string`
Support delimiters of any length 2017-07-24 04:38:02 +02:00			`for idx, val := range delims[3] {`
			`exclusion = append(exclusion, "[^"+string(val)+"]")`
			`if idx > 0 {`
			`exclusion[idx] = delims[3][0:idx] + exclusion[idx]`
			`}`
			`}`

			`tokenMatcher := regexp.MustCompile(`
			fmt.Sprintf(`%s-?\s(.+?)\s-?%s\|%s-?\s(\w+)(?:\s+((?:%v)+?))?\s-?%s`,
			`// QuoteMeta will escape any of these that are regex commands`
			`regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),`
			`regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "\|"), regexp.QuoteMeta(delims[3]),`
			`),`
			`)`

			`return tokenMatcher`
Add comments and update tests 2017-07-24 03:00:05 +02:00			`}`