liquid/parser/scanner.go

package parser

import (
	"fmt"
	"regexp"
	"strings"
)

// Scan breaks a string into a sequence of Tokens.
func Scan(data string, loc SourceLoc, delims []byte) (tokens []Token) {
	// delims = {, }, % => delimiters = {{, }}, {%, %}
	if len(delims) != 3 {
		delims = []byte{'{', '}', '%'}
	}
	delimiters := formFullDelimiters(delims)
	tokenMatcher := regexp.MustCompile(
		fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:[^%%]|%%[^}])+?))?\s*-?%s`,
			// QuoteMeta will escape any of these that are regex commands
			regexp.QuoteMeta(delimiters[0]), regexp.QuoteMeta(delimiters[1]),
			regexp.QuoteMeta(delimiters[2]), regexp.QuoteMeta(delimiters[3]),
		),
	)

	// TODO error on unterminated {{ and {%
	// TODO probably an error when a tag contains a {{ or {%, at least outside of a string
	p, pe := 0, len(data)
	for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
		ts, te := m[0], m[1]
		if p < ts {
			tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
			loc.LineNo += strings.Count(data[p:ts], "\n")
		}
		source := data[ts:te]
		switch data[ts+1] {
		case delims[0]:
			tok := Token{
				Type:      ObjTokenType,
				SourceLoc: loc,
				Source:    source,
				Args:      data[m[2]:m[3]],
				TrimLeft:  source[2] == '-',
				TrimRight: source[len(source)-3] == '-',
			}
			tokens = append(tokens, tok)
		case delims[2]:
			tok := Token{
				Type:      TagTokenType,
				SourceLoc: loc,
				Source:    source,
				Name:      data[m[4]:m[5]],
				TrimLeft:  source[2] == '-',
				TrimRight: source[len(source)-3] == '-',
			}
			if m[6] > 0 {
				tok.Args = data[m[6]:m[7]]
			}
			tokens = append(tokens, tok)
		}
		loc.LineNo += strings.Count(source, "\n")
		p = te
	}
	if p < pe {
		tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})
	}
	return tokens
}

// formFullDelimiters converts the single character byte delimiters into the full string actual
// delimiters.
func formFullDelimiters(delims []byte) []string {
	// Configure the token matcher to respect the delimiters passed to it. The default delims are '{',
	// '}', '%' which turn into "{{" and "}}" for objects and "{%" and "%}" for tags
	fullDelimiters := make([]string, 4, 4)
	fullDelimiters[0] = string([]byte{delims[0], delims[0]})
	fullDelimiters[1] = string([]byte{delims[1], delims[1]})
	fullDelimiters[2] = string([]byte{delims[0], delims[2]})
	fullDelimiters[3] = string([]byte{delims[2], delims[1]})
	return fullDelimiters
}
Split package render->parser 2017-07-07 11:41:37 +02:00			`package parser`
Initial 2017-06-25 17:23:20 +02:00
			`import (`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`"fmt"`
Initial 2017-06-25 17:23:20 +02:00			`"regexp"`
Record source line number 2017-06-30 14:42:11 +02:00			`"strings"`
Initial 2017-06-25 17:23:20 +02:00			`)`

Rename chunk -> token 2017-07-09 17:18:35 +02:00			`// Scan breaks a string into a sequence of Tokens.`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`func Scan(data string, loc SourceLoc, delims []byte) (tokens []Token) {`
Add comments and update tests 2017-07-24 03:00:05 +02:00			`// delims = {, }, % => delimiters = {{, }}, {%, %}`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`if len(delims) != 3 {`
			`delims = []byte{'{', '}', '%'}`
			`}`
Add comments and update tests 2017-07-24 03:00:05 +02:00			`delimiters := formFullDelimiters(delims)`
			`tokenMatcher := regexp.MustCompile(`
			fmt.Sprintf(`%s-?\s(.+?)\s-?%s\|%s-?\s(\w+)(?:\s+((?:[^%%]\|%%[^}])+?))?\s-?%s`,
			`// QuoteMeta will escape any of these that are regex commands`
			`regexp.QuoteMeta(delimiters[0]), regexp.QuoteMeta(delimiters[1]),`
			`regexp.QuoteMeta(delimiters[2]), regexp.QuoteMeta(delimiters[3]),`
			`),`
			`)`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00
Docs 2017-06-26 18:41:41 +02:00			`// TODO error on unterminated {{ and {%`
			`// TODO probably an error when a tag contains a {{ or {%, at least outside of a string`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`p, pe := 0, len(data)`
Rename chunk -> token 2017-07-09 17:18:35 +02:00			`for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {`
Initial 2017-06-25 17:23:20 +02:00			`ts, te := m[0], m[1]`
			`if p < ts {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})`
			`loc.LineNo += strings.Count(data[p:ts], "\n")`
Initial 2017-06-25 17:23:20 +02:00			`}`
Record source line number 2017-06-30 14:42:11 +02:00			`source := data[ts:te]`
Initial 2017-06-25 17:23:20 +02:00			`switch data[ts+1] {`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`case delims[0]:`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok := Token{`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Type: ObjTokenType,`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`SourceLoc: loc,`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Source: source,`
			`Args: data[m[2]:m[3]],`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`TrimLeft: source[2] == '-',`
			`TrimRight: source[len(source)-3] == '-',`
			`}`
			`tokens = append(tokens, tok)`
Add setting to customise delimiters 2017-07-21 04:32:06 +02:00			`case delims[2]:`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok := Token{`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Type: TagTokenType,`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`SourceLoc: loc,`
Source location is an initialization parameter Fixes a race condition 2017-07-14 16:38:30 +02:00			`Source: source,`
			`Name: data[m[4]:m[5]],`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`TrimLeft: source[2] == '-',`
			`TrimRight: source[len(source)-3] == '-',`
Implement break, continue 2017-06-29 18:20:16 +02:00			`}`
Initial 2017-06-25 17:23:20 +02:00			`if m[6] > 0 {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tok.Args = data[m[6]:m[7]]`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, tok)`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`loc.LineNo += strings.Count(source, "\n")`
Implement {% else %}, {% elsif %} 2017-06-25 23:26:14 +02:00			`p = te`
Initial 2017-06-25 17:23:20 +02:00			`}`
			`if p < pe {`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})`
Initial 2017-06-25 17:23:20 +02:00			`}`
Scan whitespace control 2017-07-16 20:15:11 +02:00			`return tokens`
Initial 2017-06-25 17:23:20 +02:00			`}`
Add comments and update tests 2017-07-24 03:00:05 +02:00
			`// formFullDelimiters converts the single character byte delimiters into the full string actual`
			`// delimiters.`
			`func formFullDelimiters(delims []byte) []string {`
			`// Configure the token matcher to respect the delimiters passed to it. The default delims are '{',`
			`// '}', '%' which turn into "{{" and "}}" for objects and "{%" and "%}" for tags`
			`fullDelimiters := make([]string, 4, 4)`
			`fullDelimiters[0] = string([]byte{delims[0], delims[0]})`
			`fullDelimiters[1] = string([]byte{delims[1], delims[1]})`
			`fullDelimiters[2] = string([]byte{delims[0], delims[2]})`
			`fullDelimiters[3] = string([]byte{delims[2], delims[1]})`
			`return fullDelimiters`
			`}`