2017-07-07 11:41:37 +02:00
|
|
|
package parser
|
2017-06-25 17:23:20 +02:00
|
|
|
|
|
|
|
import (
|
2017-07-21 04:32:06 +02:00
|
|
|
"fmt"
|
2017-06-25 17:23:20 +02:00
|
|
|
"regexp"
|
2017-06-30 14:42:11 +02:00
|
|
|
"strings"
|
2017-06-25 17:23:20 +02:00
|
|
|
)
|
|
|
|
|
2017-07-09 17:18:35 +02:00
|
|
|
// Scan breaks a string into a sequence of Tokens.
|
2017-07-21 04:32:06 +02:00
|
|
|
func Scan(data string, loc SourceLoc, delims []byte) (tokens []Token) {
|
2017-07-24 03:00:05 +02:00
|
|
|
// delims = {, }, % => delimiters = {{, }}, {%, %}
|
2017-07-21 04:32:06 +02:00
|
|
|
if len(delims) != 3 {
|
|
|
|
delims = []byte{'{', '}', '%'}
|
|
|
|
}
|
2017-07-24 03:00:05 +02:00
|
|
|
delimiters := formFullDelimiters(delims)
|
|
|
|
tokenMatcher := regexp.MustCompile(
|
|
|
|
fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:[^%%]|%%[^}])+?))?\s*-?%s`,
|
|
|
|
// QuoteMeta will escape any of these that are regex commands
|
|
|
|
regexp.QuoteMeta(delimiters[0]), regexp.QuoteMeta(delimiters[1]),
|
|
|
|
regexp.QuoteMeta(delimiters[2]), regexp.QuoteMeta(delimiters[3]),
|
|
|
|
),
|
|
|
|
)
|
2017-07-21 04:32:06 +02:00
|
|
|
|
2017-06-26 18:41:41 +02:00
|
|
|
// TODO error on unterminated {{ and {%
|
|
|
|
// TODO probably an error when a tag contains a {{ or {%, at least outside of a string
|
2017-07-16 20:15:11 +02:00
|
|
|
p, pe := 0, len(data)
|
2017-07-09 17:18:35 +02:00
|
|
|
for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
|
2017-06-25 17:23:20 +02:00
|
|
|
ts, te := m[0], m[1]
|
|
|
|
if p < ts {
|
2017-07-16 20:15:11 +02:00
|
|
|
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
|
|
|
|
loc.LineNo += strings.Count(data[p:ts], "\n")
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
2017-06-30 14:42:11 +02:00
|
|
|
source := data[ts:te]
|
2017-06-25 17:23:20 +02:00
|
|
|
switch data[ts+1] {
|
2017-07-21 04:32:06 +02:00
|
|
|
case delims[0]:
|
2017-07-16 20:15:11 +02:00
|
|
|
tok := Token{
|
2017-07-14 16:38:30 +02:00
|
|
|
Type: ObjTokenType,
|
2017-07-16 20:15:11 +02:00
|
|
|
SourceLoc: loc,
|
2017-07-14 16:38:30 +02:00
|
|
|
Source: source,
|
|
|
|
Args: data[m[2]:m[3]],
|
2017-07-16 20:15:11 +02:00
|
|
|
TrimLeft: source[2] == '-',
|
|
|
|
TrimRight: source[len(source)-3] == '-',
|
|
|
|
}
|
|
|
|
tokens = append(tokens, tok)
|
2017-07-21 04:32:06 +02:00
|
|
|
case delims[2]:
|
2017-07-16 20:15:11 +02:00
|
|
|
tok := Token{
|
2017-07-14 16:38:30 +02:00
|
|
|
Type: TagTokenType,
|
2017-07-16 20:15:11 +02:00
|
|
|
SourceLoc: loc,
|
2017-07-14 16:38:30 +02:00
|
|
|
Source: source,
|
|
|
|
Name: data[m[4]:m[5]],
|
2017-07-16 20:15:11 +02:00
|
|
|
TrimLeft: source[2] == '-',
|
|
|
|
TrimRight: source[len(source)-3] == '-',
|
2017-06-29 18:20:16 +02:00
|
|
|
}
|
2017-06-25 17:23:20 +02:00
|
|
|
if m[6] > 0 {
|
2017-07-16 20:15:11 +02:00
|
|
|
tok.Args = data[m[6]:m[7]]
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
2017-07-16 20:15:11 +02:00
|
|
|
tokens = append(tokens, tok)
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
2017-07-16 20:15:11 +02:00
|
|
|
loc.LineNo += strings.Count(source, "\n")
|
2017-06-25 23:26:14 +02:00
|
|
|
p = te
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
|
|
|
if p < pe {
|
2017-07-16 20:15:11 +02:00
|
|
|
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:]})
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
2017-07-16 20:15:11 +02:00
|
|
|
return tokens
|
2017-06-25 17:23:20 +02:00
|
|
|
}
|
2017-07-24 03:00:05 +02:00
|
|
|
|
|
|
|
// formFullDelimiters converts the single character byte delimiters into the full string actual
|
|
|
|
// delimiters.
|
|
|
|
func formFullDelimiters(delims []byte) []string {
|
|
|
|
// Configure the token matcher to respect the delimiters passed to it. The default delims are '{',
|
|
|
|
// '}', '%' which turn into "{{" and "}}" for objects and "{%" and "%}" for tags
|
|
|
|
fullDelimiters := make([]string, 4, 4)
|
|
|
|
fullDelimiters[0] = string([]byte{delims[0], delims[0]})
|
|
|
|
fullDelimiters[1] = string([]byte{delims[1], delims[1]})
|
|
|
|
fullDelimiters[2] = string([]byte{delims[0], delims[2]})
|
|
|
|
fullDelimiters[3] = string([]byte{delims[2], delims[1]})
|
|
|
|
return fullDelimiters
|
|
|
|
}
|