1
0
mirror of https://github.com/danog/blackfriday.git synced 2024-11-30 04:29:13 +01:00

Merge pull request #322 from russross/v2-perf-tweaks

V2 perf tweaks
This commit is contained in:
Vytautas Šaltenis 2017-02-02 17:08:19 +02:00 committed by GitHub
commit ad7f7c56d5
14 changed files with 494 additions and 324 deletions

View File

@ -5,10 +5,9 @@
language: go
go:
- 1.2
- 1.3
- 1.4
- 1.5
- 1.6
- 1.7
install:
- go get -d -t -v ./...

190
block.go
View File

@ -29,17 +29,12 @@ const (
var (
reBackslashOrAmp = regexp.MustCompile("[\\&]")
reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
reTrailingWhitespace = regexp.MustCompile("(\n *)+$")
)
// Parse block-level data.
// Note: this function and many that it calls assume that
// the input buffer ends with a newline.
func (p *parser) block(data []byte) {
if len(data) == 0 || data[len(data)-1] != '\n' {
panic("block input is missing terminating newline")
}
// this is called recursively: enforce a maximum depth
if p.nesting >= p.maxNesting {
return
@ -131,7 +126,7 @@ func (p *parser) block(data []byte) {
if p.isHRule(data) {
p.addBlock(HorizontalRule, nil)
var i int
for i = 0; data[i] != '\n'; i++ {
for i = 0; i < len(data) && data[i] != '\n'; i++ {
}
data = data[i:]
continue
@ -216,10 +211,10 @@ func (p *parser) isPrefixHeader(data []byte) bool {
if p.flags&SpaceHeaders != 0 {
level := 0
for level < 6 && data[level] == '#' {
for level < 6 && level < len(data) && data[level] == '#' {
level++
}
if data[level] != ' ' {
if level == len(data) || data[level] != ' ' {
return false
}
}
@ -228,7 +223,7 @@ func (p *parser) isPrefixHeader(data []byte) bool {
func (p *parser) prefixHeader(data []byte) int {
level := 0
for level < 6 && data[level] == '#' {
for level < 6 && level < len(data) && data[level] == '#' {
level++
}
i := skipChar(data, level, ' ')
@ -277,7 +272,7 @@ func (p *parser) isUnderlinedHeader(data []byte) int {
if data[0] == '=' {
i := skipChar(data, 1, '=')
i = skipChar(data, i, ' ')
if data[i] == '\n' {
if i < len(data) && data[i] == '\n' {
return 1
}
return 0
@ -287,7 +282,7 @@ func (p *parser) isUnderlinedHeader(data []byte) int {
if data[0] == '-' {
i := skipChar(data, 1, '-')
i = skipChar(data, i, ' ')
if data[i] == '\n' {
if i < len(data) && data[i] == '\n' {
return 2
}
return 0
@ -419,8 +414,8 @@ func (p *parser) html(data []byte, doRender bool) int {
}
func finalizeHTMLBlock(block *Node) {
block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
block.content = []byte{}
block.Literal = block.content
block.content = nil
}
// HTML comment, lax form
@ -445,6 +440,9 @@ func (p *parser) htmlComment(data []byte, doRender bool) int {
// HR, which is the only self-closing block tag considered
func (p *parser) htmlHr(data []byte, doRender bool) int {
if len(data) < 4 {
return 0
}
if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
return 0
}
@ -452,13 +450,11 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
// not an <hr> tag after all; at least not a valid one
return 0
}
i := 3
for data[i] != '>' && data[i] != '\n' {
for i < len(data) && data[i] != '>' && data[i] != '\n' {
i++
}
if data[i] == '>' {
if i < len(data) && data[i] == '>' {
i++
if j := p.isEmpty(data[i:]); j > 0 {
size := i + j
@ -473,13 +469,12 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
return size
}
}
return 0
}
func (p *parser) htmlFindTag(data []byte) (string, bool) {
i := 0
for isalnum(data[i]) {
for i < len(data) && isalnum(data[i]) {
i++
}
key := string(data[:i])
@ -536,7 +531,10 @@ func (*parser) isEmpty(data []byte) int {
return 0
}
}
return i + 1
if i < len(data) && data[i] == '\n' {
i++
}
return i
}
func (*parser) isHRule(data []byte) bool {
@ -555,7 +553,7 @@ func (*parser) isHRule(data []byte) bool {
// the whole line must be the char or whitespace
n := 0
for data[i] != '\n' {
for i < len(data) && data[i] != '\n' {
switch {
case data[i] == c:
n++
@ -571,8 +569,7 @@ func (*parser) isHRule(data []byte) bool {
// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
// and returns the end index if so, or 0 otherwise. It also returns the marker found.
// If syntax is not nil, it gets set to the syntax specified in the fence line.
// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
i, size := 0, 0
// skip up to three spaces
@ -614,7 +611,7 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
i = skipChar(data, i, ' ')
if i >= len(data) {
if newlineOptional && i == len(data) {
if i == len(data) {
return i, marker
}
return 0, ""
@ -659,12 +656,11 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
i = skipChar(data, i, ' ')
if i >= len(data) || data[i] != '\n' {
if newlineOptional && i == len(data) {
if i == len(data) {
return i, marker
}
return 0, ""
}
return i + 1, marker // Take newline into account.
}
@ -673,7 +669,7 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
// If doRender is true, a final newline is mandatory to recognize the fenced code block.
func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
var syntax string
beg, marker := isFenceLine(data, &syntax, "", false)
beg, marker := isFenceLine(data, &syntax, "")
if beg == 0 || beg >= len(data) {
return 0
}
@ -686,8 +682,7 @@ func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
// safe to assume beg < len(data)
// check for the end of the code block
newlineOptional := !doRender
fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
if fenceEnd != 0 {
beg += fenceEnd
break
@ -739,7 +734,7 @@ func finalizeCodeBlock(block *Node) {
block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
block.Literal = rest
} else {
block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
block.Literal = block.content
}
block.content = nil
}
@ -757,7 +752,7 @@ func (p *parser) table(data []byte) int {
for i < len(data) {
pipes, rowStart := 0, i
for ; data[i] != '\n'; i++ {
for ; i < len(data) && data[i] != '\n'; i++ {
if data[i] == '|' {
pipes++
}
@ -769,7 +764,9 @@ func (p *parser) table(data []byte) int {
}
// include the newline in data sent to tableRow
if i < len(data) && data[i] == '\n' {
i++
}
p.tableRow(data[rowStart:i], columns, false)
}
@ -788,7 +785,7 @@ func isBackslashEscaped(data []byte, i int) bool {
func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
i := 0
colCount := 1
for i = 0; data[i] != '\n'; i++ {
for i = 0; i < len(data) && data[i] != '\n'; i++ {
if data[i] == '|' && !isBackslashEscaped(data, i) {
colCount++
}
@ -800,7 +797,11 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
}
// include the newline in the data sent to tableRow
header := data[:i+1]
j := i
if j < len(data) && data[j] == '\n' {
j++
}
header := data[:j]
// column count ignores pipes at beginning or end of line
if data[0] == '|' {
@ -826,7 +827,7 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
// and trailing | optional on last column
col := 0
for data[i] != '\n' {
for i < len(data) && data[i] != '\n' {
dashes := 0
if data[i] == ':' {
@ -834,19 +835,21 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
columns[col] |= TableAlignmentLeft
dashes++
}
for data[i] == '-' {
for i < len(data) && data[i] == '-' {
i++
dashes++
}
if data[i] == ':' {
if i < len(data) && data[i] == ':' {
i++
columns[col] |= TableAlignmentRight
dashes++
}
for data[i] == ' ' {
for i < len(data) && data[i] == ' ' {
i++
}
if i == len(data) {
return
}
// end of column test is messy
switch {
case dashes < 3:
@ -857,12 +860,12 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
// marker found, now skip past trailing whitespace
col++
i++
for data[i] == ' ' {
for i < len(data) && data[i] == ' ' {
i++
}
// trailing junk found after last column
if col >= colCount && data[i] != '\n' {
if col >= colCount && i < len(data) && data[i] != '\n' {
return
}
@ -885,7 +888,10 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
p.addBlock(TableHead, nil)
p.tableRow(header, columns, true)
size = i + 1
size = i
if size < len(data) && data[size] == '\n' {
size++
}
return
}
@ -898,13 +904,13 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
}
for col = 0; col < len(columns) && i < len(data); col++ {
for data[i] == ' ' {
for i < len(data) && data[i] == ' ' {
i++
}
cellStart := i
for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
i++
}
@ -913,7 +919,7 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
// skip the end-of-cell marker, possibly taking us past end of buffer
i++
for cellEnd > cellStart && data[cellEnd-1] == ' ' {
for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
cellEnd--
}
@ -935,11 +941,11 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
// returns blockquote prefix length
func (p *parser) quotePrefix(data []byte) int {
i := 0
for i < 3 && data[i] == ' ' {
for i < 3 && i < len(data) && data[i] == ' ' {
i++
}
if data[i] == '>' {
if data[i+1] == ' ' {
if i < len(data) && data[i] == '>' {
if i+1 < len(data) && data[i+1] == ' ' {
return i + 2
}
return i + 1
@ -969,7 +975,7 @@ func (p *parser) quote(data []byte) int {
// Step over whole lines, collecting them. While doing that, check for
// fenced code and if one's found, incorporate it altogether,
// irregardless of any contents inside it
for data[end] != '\n' {
for end < len(data) && data[end] != '\n' {
if p.flags&FencedCode != 0 {
if i := p.fencedCodeBlock(data[end:], false); i > 0 {
// -1 to compensate for the extra end++ after the loop:
@ -979,7 +985,9 @@ func (p *parser) quote(data []byte) int {
}
end++
}
if end < len(data) && data[end] == '\n' {
end++
}
if pre := p.quotePrefix(data[beg:]); pre > 0 {
// skip the prefix
beg += pre
@ -997,7 +1005,10 @@ func (p *parser) quote(data []byte) int {
// returns prefix length for block code
func (p *parser) codePrefix(data []byte) int {
if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
if len(data) >= 1 && data[0] == '\t' {
return 1
}
if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
return 4
}
return 0
@ -1009,10 +1020,12 @@ func (p *parser) code(data []byte) int {
i := 0
for i < len(data) {
beg := i
for data[i] != '\n' {
for i < len(data) && data[i] != '\n' {
i++
}
if i < len(data) && data[i] == '\n' {
i++
}
blankline := p.isEmpty(data[beg:i]) > 0
if pre := p.codePrefix(data[beg:i]); pre > 0 {
@ -1023,7 +1036,7 @@ func (p *parser) code(data []byte) int {
break
}
// verbatim copy to the working buffeu
// verbatim copy to the working buffer
if blankline {
work.WriteByte('\n')
} else {
@ -1053,15 +1066,16 @@ func (p *parser) code(data []byte) int {
// returns unordered list item prefix
func (p *parser) uliPrefix(data []byte) int {
i := 0
// start with up to 3 spaces
for i < 3 && data[i] == ' ' {
for i < len(data) && i < 3 && data[i] == ' ' {
i++
}
// need a *, +, or - followed by a space
if i >= len(data)-1 {
return 0
}
// need one of {'*', '+', '-'} followed by a space or a tab
if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
data[i+1] != ' ' {
(data[i+1] != ' ' && data[i+1] != '\t') {
return 0
}
return i + 2
@ -1072,18 +1086,21 @@ func (p *parser) oliPrefix(data []byte) int {
i := 0
// start with up to 3 spaces
for i < 3 && data[i] == ' ' {
for i < 3 && i < len(data) && data[i] == ' ' {
i++
}
// count the digits
start := i
for data[i] >= '0' && data[i] <= '9' {
for i < len(data) && data[i] >= '0' && data[i] <= '9' {
i++
}
if start == i || i >= len(data)-1 {
return 0
}
// we need >= 1 digits followed by a dot and a space
if start == i || data[i] != '.' || data[i+1] != ' ' {
// we need >= 1 digits followed by a dot and a space or a tab
if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
return 0
}
return i + 2
@ -1091,13 +1108,15 @@ func (p *parser) oliPrefix(data []byte) int {
// returns definition list item prefix
func (p *parser) dliPrefix(data []byte) int {
i := 0
// need a : followed by a spaces
if data[i] != ':' || data[i+1] != ' ' {
if len(data) < 2 {
return 0
}
for data[i] == ' ' {
i := 0
// need a ':' followed by a space or a tab
if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
return 0
}
for i < len(data) && data[i] == ' ' {
i++
}
return i + 2
@ -1175,9 +1194,13 @@ func finalizeList(block *Node) {
func (p *parser) listItem(data []byte, flags *ListType) int {
// keep track of the indentation of the first line
itemIndent := 0
if data[0] == '\t' {
itemIndent += 4
} else {
for itemIndent < 3 && data[itemIndent] == ' ' {
itemIndent++
}
}
var bulletChar byte = '*'
i := p.uliPrefix(data)
@ -1203,13 +1226,13 @@ func (p *parser) listItem(data []byte, flags *ListType) int {
}
// skip leading whitespace on first line
for data[i] == ' ' {
for i < len(data) && data[i] == ' ' {
i++
}
// find the end of the line
line := i
for i > 0 && data[i-1] != '\n' {
for i > 0 && i < len(data) && data[i-1] != '\n' {
i++
}
@ -1229,7 +1252,7 @@ gatherlines:
i++
// find the end of this line
for data[i-1] != '\n' {
for i < len(data) && data[i-1] != '\n' {
i++
}
@ -1243,11 +1266,18 @@ gatherlines:
// calculate the indentation
indent := 0
indentIndex := 0
if data[line] == '\t' {
indentIndex++
indent += 4
} else {
for indent < 4 && line+indent < i && data[line+indent] == ' ' {
indent++
indentIndex++
}
}
chunk := data[line+indent : i]
chunk := data[line+indentIndex : i]
// evaluate how this line fits in
switch {
@ -1288,7 +1318,7 @@ gatherlines:
if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
// is the next item still a part of this list?
next := i
for data[next] != '\n' {
for next < len(data) && data[next] != '\n' {
next++
}
for next < len(data)-1 && data[next] == '\n' {
@ -1316,7 +1346,7 @@ gatherlines:
}
// add the line into the working buffer without prefix
raw.Write(data[line+indent : i])
raw.Write(data[line+indentIndex : i])
line = i
}
@ -1364,8 +1394,11 @@ func (p *parser) renderParagraph(data []byte) {
beg++
}
end := len(data)
// trim trailing newline
end := len(data) - 1
if data[len(data)-1] == '\n' {
end--
}
// trim trailing spaces
for end > beg && data[end-1] == ' ' {
@ -1437,7 +1470,7 @@ func (p *parser) paragraph(data []byte) int {
block.HeaderID = id
// find the end of the underline
for data[i] != '\n' {
for i < len(data) && data[i] != '\n' {
i++
}
return i
@ -1470,7 +1503,8 @@ func (p *parser) paragraph(data []byte) int {
// if there's a definition list item, prev line is a definition term
if p.flags&DefinitionLists != 0 {
if p.dliPrefix(current) != 0 {
return p.list(data[prev:], ListTypeDefinition)
ret := p.list(data[prev:], ListTypeDefinition)
return ret
}
}
@ -1486,10 +1520,12 @@ func (p *parser) paragraph(data []byte) int {
}
// otherwise, scan to the beginning of the next line
for data[i] != '\n' {
i++
nl := bytes.IndexByte(data[i:], '\n')
if nl >= 0 {
i += nl + 1
} else {
i += len(data[i:])
}
i++
}
p.renderParagraph(data[:i])

View File

@ -1661,14 +1661,14 @@ func TestIsFenceLine(t *testing.T) {
tests := []struct {
data []byte
syntaxRequested bool
newlineOptional bool
wantEnd int
wantMarker string
wantSyntax string
}{
{
data: []byte("```"),
wantEnd: 0,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("```\nstuff here\n"),
@ -1685,23 +1685,15 @@ func TestIsFenceLine(t *testing.T) {
data: []byte("stuff here\n```\n"),
wantEnd: 0,
},
{
data: []byte("```"),
newlineOptional: true,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("```"),
syntaxRequested: true,
newlineOptional: true,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("``` go"),
syntaxRequested: true,
newlineOptional: true,
wantEnd: 6,
wantMarker: "```",
wantSyntax: "go",
@ -1713,7 +1705,7 @@ func TestIsFenceLine(t *testing.T) {
if test.syntaxRequested {
syntax = new(string)
}
end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
end, marker := isFenceLine(test.data, syntax, "```")
if got, want := end, test.wantEnd; got != want {
t.Errorf("got end %v, want %v", got, want)
}

34
esc.go Normal file
View File

@ -0,0 +1,34 @@
package blackfriday
import (
"html"
"io"
)
var htmlEscaper = [256][]byte{
'&': []byte("&amp;"),
'<': []byte("&lt;"),
'>': []byte("&gt;"),
'"': []byte("&quot;"),
}
func escapeHTML(w io.Writer, s []byte) {
var start, end int
for end < len(s) {
escSeq := htmlEscaper[s[end]]
if escSeq != nil {
w.Write(s[start:end])
w.Write(escSeq)
start = end + 1
}
end++
}
if start < len(s) && end <= len(s) {
w.Write(s[start:end])
}
}
func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc))
}

48
esc_test.go Normal file
View File

@ -0,0 +1,48 @@
package blackfriday
import (
"bytes"
"testing"
)
func TestEsc(t *testing.T) {
tests := []string{
"abc", "abc",
"a&c", "a&amp;c",
"<", "&lt;",
"[]:<", "[]:&lt;",
"Hello <!--", "Hello &lt;!--",
}
for i := 0; i < len(tests); i += 2 {
var b bytes.Buffer
escapeHTML(&b, []byte(tests[i]))
if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) {
t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]",
tests[i], tests[i+1], b.String())
}
}
}
func BenchmarkEscapeHTML(b *testing.B) {
tests := [][]byte{
[]byte(""),
[]byte("AT&T has an ampersand in their name."),
[]byte("AT&amp;T is another way to write it."),
[]byte("This & that."),
[]byte("4 < 5."),
[]byte("6 > 5."),
[]byte("Here's a [link] [1] with an ampersand in the URL."),
[]byte("Here's a link with an ampersand in the link text: [AT&T] [2]."),
[]byte("Here's an inline [link](/script?foo=1&bar=2)."),
[]byte("Here's an inline [link](</script?foo=1&bar=2>)."),
[]byte("[1]: http://example.com/?foo=1&bar=2"),
[]byte("[2]: http://att.com/ \"AT&T\""),
}
var buf bytes.Buffer
for n := 0; n < b.N; n++ {
for _, t := range tests {
escapeHTML(&buf, t)
buf.Reset()
}
}
}

308
html.go
View File

@ -18,7 +18,6 @@ package blackfriday
import (
"bytes"
"fmt"
"html"
"io"
"regexp"
"strings"
@ -308,22 +307,24 @@ func isSmartypantable(node *Node) bool {
}
func appendLanguageAttr(attrs []string, info []byte) []string {
infoWords := bytes.Split(info, []byte("\t "))
if len(infoWords) > 0 && len(infoWords[0]) > 0 {
attrs = append(attrs, fmt.Sprintf("class=\"language-%s\"", infoWords[0]))
}
if len(info) == 0 {
return attrs
}
endOfLang := bytes.IndexAny(info, "\t ")
if endOfLang < 0 {
endOfLang = len(info)
}
return append(attrs, fmt.Sprintf("class=\"language-%s\"", info[:endOfLang]))
}
func tag(name string, attrs []string, selfClosing bool) []byte {
result := "<" + name
if attrs != nil && len(attrs) > 0 {
result += " " + strings.Join(attrs, " ")
func (r *HTMLRenderer) tag(w io.Writer, name []byte, attrs []string) {
w.Write(name)
if len(attrs) > 0 {
w.Write(spaceBytes)
w.Write([]byte(strings.Join(attrs, " ")))
}
if selfClosing {
result += " /"
}
return []byte(result + ">")
w.Write(gtBytes)
r.lastOutputLen = 1
}
func footnoteRef(prefix string, node *Node) []byte {
@ -371,17 +372,6 @@ func cellAlignment(align CellAlignFlags) string {
}
}
func esc(text []byte) []byte {
unesc := []byte(html.UnescapeString(string(text)))
return escCode(unesc)
}
func escCode(text []byte) []byte {
e1 := []byte(html.EscapeString(string(text)))
e2 := bytes.Replace(e1, []byte("&#34;"), []byte("&quot;"), -1)
return bytes.Replace(e2, []byte("&#39;"), []byte{'\''}, -1)
}
func (r *HTMLRenderer) out(w io.Writer, text []byte) {
if r.disableTags > 0 {
w.Write(htmlTagRe.ReplaceAll(text, []byte{}))
@ -393,7 +383,102 @@ func (r *HTMLRenderer) out(w io.Writer, text []byte) {
func (r *HTMLRenderer) cr(w io.Writer) {
if r.lastOutputLen > 0 {
r.out(w, []byte{'\n'})
r.out(w, nlBytes)
}
}
var (
nlBytes = []byte{'\n'}
gtBytes = []byte{'>'}
spaceBytes = []byte{' '}
)
var (
brTag = []byte("<br>")
brXHTMLTag = []byte("<br />")
emTag = []byte("<em>")
emCloseTag = []byte("</em>")
strongTag = []byte("<strong>")
strongCloseTag = []byte("</strong>")
delTag = []byte("<del>")
delCloseTag = []byte("</del>")
ttTag = []byte("<tt>")
ttCloseTag = []byte("</tt>")
aTag = []byte("<a")
aCloseTag = []byte("</a>")
preTag = []byte("<pre>")
preCloseTag = []byte("</pre>")
codeTag = []byte("<code>")
codeCloseTag = []byte("</code>")
pTag = []byte("<p>")
pCloseTag = []byte("</p>")
blockquoteTag = []byte("<blockquote>")
blockquoteCloseTag = []byte("</blockquote>")
hrTag = []byte("<hr>")
hrXHTMLTag = []byte("<hr />")
ulTag = []byte("<ul>")
ulCloseTag = []byte("</ul>")
olTag = []byte("<ol>")
olCloseTag = []byte("</ol>")
dlTag = []byte("<dl>")
dlCloseTag = []byte("</dl>")
liTag = []byte("<li>")
liCloseTag = []byte("</li>")
ddTag = []byte("<dd>")
ddCloseTag = []byte("</dd>")
dtTag = []byte("<dt>")
dtCloseTag = []byte("</dt>")
tableTag = []byte("<table>")
tableCloseTag = []byte("</table>")
tdTag = []byte("<td")
tdCloseTag = []byte("</td>")
thTag = []byte("<th")
thCloseTag = []byte("</th>")
theadTag = []byte("<thead>")
theadCloseTag = []byte("</thead>")
tbodyTag = []byte("<tbody>")
tbodyCloseTag = []byte("</tbody>")
trTag = []byte("<tr>")
trCloseTag = []byte("</tr>")
h1Tag = []byte("<h1")
h1CloseTag = []byte("</h1>")
h2Tag = []byte("<h2")
h2CloseTag = []byte("</h2>")
h3Tag = []byte("<h3")
h3CloseTag = []byte("</h3>")
h4Tag = []byte("<h4")
h4CloseTag = []byte("</h4>")
h5Tag = []byte("<h5")
h5CloseTag = []byte("</h5>")
h6Tag = []byte("<h6")
h6CloseTag = []byte("</h6>")
footnotesDivBytes = []byte("\n<div class=\"footnotes\">\n\n")
footnotesCloseDivBytes = []byte("\n</div>\n")
)
func headerTagsFromLevel(level int) ([]byte, []byte) {
switch level {
case 1:
return h1Tag, h1CloseTag
case 2:
return h2Tag, h2CloseTag
case 3:
return h3Tag, h3CloseTag
case 4:
return h4Tag, h4CloseTag
case 5:
return h5Tag, h5CloseTag
default:
return h6Tag, h6CloseTag
}
}
func (r *HTMLRenderer) outHRTag(w io.Writer) {
if r.Flags&UseXHTML == 0 {
r.out(w, hrTag)
} else {
r.out(w, hrXHTMLTag)
}
}
@ -411,34 +496,44 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
attrs := []string{}
switch node.Type {
case Text:
node.Literal = esc(node.Literal)
if r.Flags&Smartypants != 0 {
node.Literal = r.sr.Process(node.Literal)
var tmp bytes.Buffer
escapeHTML(&tmp, node.Literal)
r.sr.Process(w, tmp.Bytes())
} else {
if node.Parent.Type == Link {
escLink(w, node.Literal)
} else {
escapeHTML(w, node.Literal)
}
}
r.out(w, node.Literal)
case Softbreak:
r.out(w, []byte{'\n'})
r.cr(w)
// TODO: make it configurable via out(renderer.softbreak)
case Hardbreak:
r.out(w, tag("br", nil, true))
if r.Flags&UseXHTML == 0 {
r.out(w, brTag)
} else {
r.out(w, brXHTMLTag)
}
r.cr(w)
case Emph:
if entering {
r.out(w, tag("em", nil, false))
r.out(w, emTag)
} else {
r.out(w, tag("/em", nil, false))
r.out(w, emCloseTag)
}
case Strong:
if entering {
r.out(w, tag("strong", nil, false))
r.out(w, strongTag)
} else {
r.out(w, tag("/strong", nil, false))
r.out(w, strongCloseTag)
}
case Del:
if entering {
r.out(w, tag("del", nil, false))
r.out(w, delTag)
} else {
r.out(w, tag("/del", nil, false))
r.out(w, delCloseTag)
}
case HTMLSpan:
if r.Flags&SkipHTML != 0 {
@ -450,30 +545,36 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
dest := node.LinkData.Destination
if needSkipLink(r.Flags, dest) {
if entering {
r.out(w, tag("tt", nil, false))
r.out(w, ttTag)
} else {
r.out(w, tag("/tt", nil, false))
r.out(w, ttCloseTag)
}
} else {
if entering {
dest = r.addAbsPrefix(dest)
//if (!(options.safe && potentiallyUnsafe(node.destination))) {
attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest)))
//}
var hrefBuf bytes.Buffer
hrefBuf.WriteString("href=\"")
escLink(&hrefBuf, dest)
hrefBuf.WriteByte('"')
attrs = append(attrs, hrefBuf.String())
if node.NoteID != 0 {
r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node))
break
}
attrs = appendLinkAttrs(attrs, r.Flags, dest)
if len(node.LinkData.Title) > 0 {
attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title)))
var titleBuff bytes.Buffer
titleBuff.WriteString("title=\"")
escapeHTML(&titleBuff, node.LinkData.Title)
titleBuff.WriteByte('"')
attrs = append(attrs, titleBuff.String())
}
r.out(w, tag("a", attrs, false))
r.tag(w, aTag, attrs)
} else {
if node.NoteID != 0 {
break
}
r.out(w, tag("/a", nil, false))
r.out(w, aCloseTag)
}
}
case Image:
@ -487,7 +588,9 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
//if options.safe && potentiallyUnsafe(dest) {
//out(w, `<img src="" alt="`)
//} else {
r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest))))
r.out(w, []byte(`<img src="`))
escLink(w, dest)
r.out(w, []byte(`" alt="`))
//}
}
r.disableTags++
@ -496,15 +599,15 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if r.disableTags == 0 {
if node.LinkData.Title != nil {
r.out(w, []byte(`" title="`))
r.out(w, esc(node.LinkData.Title))
escapeHTML(w, node.LinkData.Title)
}
r.out(w, []byte(`" />`))
}
}
case Code:
r.out(w, tag("code", nil, false))
r.out(w, escCode(node.Literal))
r.out(w, tag("/code", nil, false))
r.out(w, codeTag)
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
case Document:
break
case Paragraph:
@ -523,9 +626,9 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if node.Parent.Type == BlockQuote && node.Prev == nil {
r.cr(w)
}
r.out(w, tag("p", attrs, false))
r.out(w, pTag)
} else {
r.out(w, tag("/p", attrs, false))
r.out(w, pCloseTag)
if !(node.Parent.Type == Item && node.Next == nil) {
r.cr(w)
}
@ -533,9 +636,9 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
case BlockQuote:
if entering {
r.cr(w)
r.out(w, tag("blockquote", attrs, false))
r.out(w, blockquoteTag)
} else {
r.out(w, tag("/blockquote", nil, false))
r.out(w, blockquoteCloseTag)
r.cr(w)
}
case HTMLBlock:
@ -546,7 +649,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.out(w, node.Literal)
r.cr(w)
case Header:
tagname := fmt.Sprintf("h%d", node.Level)
openTag, closeTag := headerTagsFromLevel(node.Level)
if entering {
if node.IsTitleblock {
attrs = append(attrs, `class="title"`)
@ -562,39 +665,42 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
attrs = append(attrs, fmt.Sprintf(`id="%s"`, id))
}
r.cr(w)
r.out(w, tag(tagname, attrs, false))
r.tag(w, openTag, attrs)
} else {
r.out(w, tag("/"+tagname, nil, false))
r.out(w, closeTag)
if !(node.Parent.Type == Item && node.Next == nil) {
r.cr(w)
}
}
case HorizontalRule:
r.cr(w)
r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0))
r.outHRTag(w)
r.cr(w)
case List:
tagName := "ul"
openTag := ulTag
closeTag := ulCloseTag
if node.ListFlags&ListTypeOrdered != 0 {
tagName = "ol"
openTag = olTag
closeTag = olCloseTag
}
if node.ListFlags&ListTypeDefinition != 0 {
tagName = "dl"
openTag = dlTag
closeTag = dlCloseTag
}
if entering {
if node.IsFootnotesList {
r.out(w, []byte("\n<div class=\"footnotes\">\n\n"))
r.out(w, tag("hr", attrs, r.Flags&UseXHTML != 0))
r.out(w, footnotesDivBytes)
r.outHRTag(w)
r.cr(w)
}
r.cr(w)
if node.Parent.Type == Item && node.Parent.Parent.Tight {
r.cr(w)
}
r.out(w, tag(tagName, attrs, false))
r.tag(w, openTag[:len(openTag)-1], attrs)
r.cr(w)
} else {
r.out(w, tag("/"+tagName, nil, false))
r.out(w, closeTag)
//cr(w)
//if node.parent.Type != Item {
// cr(w)
@ -606,16 +712,19 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w)
}
if node.IsFootnotesList {
r.out(w, []byte("\n</div>\n"))
r.out(w, footnotesCloseDivBytes)
}
}
case Item:
tagName := "li"
openTag := liTag
closeTag := liCloseTag
if node.ListFlags&ListTypeDefinition != 0 {
tagName = "dd"
openTag = ddTag
closeTag = ddCloseTag
}
if node.ListFlags&ListTypeTerm != 0 {
tagName = "dt"
openTag = dtTag
closeTag = dtCloseTag
}
if entering {
if itemOpenCR(node) {
@ -626,7 +735,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.out(w, footnoteItem(r.FootnoteAnchorPrefix, slug))
break
}
r.out(w, tag(tagName, nil, false))
r.out(w, openTag)
} else {
if node.ListData.RefLink != nil {
slug := slugify(node.ListData.RefLink)
@ -634,32 +743,34 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.out(w, footnoteReturnLink(r.FootnoteAnchorPrefix, r.FootnoteReturnLinkContents, slug))
}
}
r.out(w, tag("/"+tagName, nil, false))
r.out(w, closeTag)
r.cr(w)
}
case CodeBlock:
attrs = appendLanguageAttr(attrs, node.Info)
r.cr(w)
r.out(w, tag("pre", nil, false))
r.out(w, tag("code", attrs, false))
r.out(w, escCode(node.Literal))
r.out(w, tag("/code", nil, false))
r.out(w, tag("/pre", nil, false))
r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs)
escapeHTML(w, node.Literal)
r.out(w, codeCloseTag)
r.out(w, preCloseTag)
if node.Parent.Type != Item {
r.cr(w)
}
case Table:
if entering {
r.cr(w)
r.out(w, tag("table", nil, false))
r.out(w, tableTag)
} else {
r.out(w, tag("/table", nil, false))
r.out(w, tableCloseTag)
r.cr(w)
}
case TableCell:
tagName := "td"
openTag := tdTag
closeTag := tdCloseTag
if node.IsHeader {
tagName = "th"
openTag = thTag
closeTag = thCloseTag
}
if entering {
align := cellAlignment(node.Align)
@ -669,37 +780,37 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if node.Prev == nil {
r.cr(w)
}
r.out(w, tag(tagName, attrs, false))
r.tag(w, openTag, attrs)
} else {
r.out(w, tag("/"+tagName, nil, false))
r.out(w, closeTag)
r.cr(w)
}
case TableHead:
if entering {
r.cr(w)
r.out(w, tag("thead", nil, false))
r.out(w, theadTag)
} else {
r.out(w, tag("/thead", nil, false))
r.out(w, theadCloseTag)
r.cr(w)
}
case TableBody:
if entering {
r.cr(w)
r.out(w, tag("tbody", nil, false))
r.out(w, tbodyTag)
// XXX: this is to adhere to a rather silly test. Should fix test.
if node.FirstChild == nil {
r.cr(w)
}
} else {
r.out(w, tag("/tbody", nil, false))
r.out(w, tbodyCloseTag)
r.cr(w)
}
case TableRow:
if entering {
r.cr(w)
r.out(w, tag("tr", nil, false))
r.out(w, trTag)
} else {
r.out(w, tag("/tr", nil, false))
r.out(w, trCloseTag)
r.cr(w)
}
default:
@ -725,9 +836,9 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString("<head>\n")
w.WriteString(" <title>")
if r.Flags&Smartypants != 0 {
w.Write(r.sr.Process([]byte(r.Title)))
r.sr.Process(w, []byte(r.Title))
} else {
w.Write(esc([]byte(r.Title)))
escapeHTML(w, []byte(r.Title))
}
w.WriteString("</title>\n")
w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
@ -740,14 +851,14 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString(">\n")
if r.CSS != "" {
w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
w.Write(esc([]byte(r.CSS)))
escapeHTML(w, []byte(r.CSS))
w.WriteString("\"")
w.WriteString(ending)
w.WriteString(">\n")
}
if r.Icon != "" {
w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"")
w.Write(esc([]byte(r.Icon)))
escapeHTML(w, []byte(r.Icon))
w.WriteString("\"")
w.WriteString(ending)
w.WriteString(">\n")
@ -807,6 +918,7 @@ func (r *HTMLRenderer) writeTOC(w *bytes.Buffer, ast *Node) {
w.Write(buf.Bytes())
w.WriteString("\n\n</nav>\n")
}
r.lastOutputLen = buf.Len()
}
func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {
@ -820,17 +932,17 @@ func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {
func (r *HTMLRenderer) Render(ast *Node) []byte {
//println("render_Blackfriday")
//dump(ast)
var buff bytes.Buffer
r.writeDocumentHeader(&buff)
var buf bytes.Buffer
r.writeDocumentHeader(&buf)
if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 {
r.writeTOC(&buff, ast)
r.writeTOC(&buf, ast)
if r.Flags&OmitContents != 0 {
return buff.Bytes()
return buf.Bytes()
}
}
ast.Walk(func(node *Node, entering bool) WalkStatus {
return r.RenderNode(&buff, node, entering)
return r.RenderNode(&buf, node, entering)
})
r.writeDocumentFooter(&buff)
return buff.Bytes()
r.writeDocumentFooter(&buf)
return buf.Bytes()
}

View File

@ -33,51 +33,38 @@ var (
// offset is the number of valid chars before the current cursor
func (p *parser) inline(currBlock *Node, data []byte) {
// this is called recursively: enforce a maximum depth
if p.nesting >= p.maxNesting {
// handlers might call us recursively: enforce a maximum depth
if p.nesting >= p.maxNesting || len(data) == 0 {
return
}
p.nesting++
i, end := 0, 0
for i < len(data) {
// Stop at EOL
if data[i] == '\n' && i+1 == len(data) {
break
}
for ; end < len(data); end++ {
if p.inlineCallback[data[end]] != nil {
break
}
}
if end >= len(data) {
if data[end-1] == '\n' {
currBlock.AppendChild(text(data[i : end-1]))
} else {
currBlock.AppendChild(text(data[i:end]))
}
break
}
// call the trigger
beg, end := 0, 0
for end < len(data) {
handler := p.inlineCallback[data[end]]
if handler != nil {
if consumed, node := handler(p, data, end); consumed == 0 {
// No action from the callback.
end++
} else {
// Copy inactive chars into the output.
currBlock.AppendChild(text(data[i:end]))
currBlock.AppendChild(text(data[beg:end]))
if node != nil {
currBlock.AppendChild(node)
}
// Skip past whatever the callback used.
i = end + consumed
end = i
beg = end + consumed
end = beg
}
} else {
end++
}
}
if beg < len(data) {
if data[end-1] == '\n' {
end--
}
currBlock.AppendChild(text(data[beg:end]))
}
p.nesting--
}
@ -733,25 +720,45 @@ func linkEndsWithEntity(data []byte, linkEnd int) bool {
return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
}
// hasPrefixCaseInsensitive is a custom implementation of
// strings.HasPrefix(strings.ToLower(s), prefix)
// we rolled our own because ToLower pulls in a huge machinery of lowercasing
// anything from Unicode and that's very slow. Since this func will only be
// used on ASCII protocol prefixes, we can take shortcuts.
func hasPrefixCaseInsensitive(s, prefix []byte) bool {
if len(s) < len(prefix) {
return false
}
delta := byte('a' - 'A')
for i, b := range prefix {
if b != s[i] && b != s[i]+delta {
return false
}
}
return true
}
var protocolPrefixes = [][]byte{
[]byte("http://"),
[]byte("https://"),
[]byte("ftp://"),
[]byte("file://"),
[]byte("mailto:"),
}
const shortestPrefix = 6 // len("ftp://"), the shortest of the above
func maybeAutoLink(p *parser, data []byte, offset int) (int, *Node) {
// quick check to rule out most false hits
if p.insideLink || len(data) < offset+6 { // 6 is the len() of the shortest prefix below
if p.insideLink || len(data) < offset+shortestPrefix {
return 0, nil
}
prefixes := []string{
"http://",
"https://",
"ftp://",
"file://",
"mailto:",
}
for _, prefix := range prefixes {
for _, prefix := range protocolPrefixes {
endOfHead := offset + 8 // 8 is the len() of the longest prefix
if endOfHead > len(data) {
endOfHead = len(data)
}
head := bytes.ToLower(data[offset:endOfHead])
if bytes.HasPrefix(head, []byte(prefix)) {
if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
return autoLink(p, data, offset)
}
}

View File

@ -1133,7 +1133,7 @@ func TestUseXHTML(t *testing.T) {
func TestSkipHTML(t *testing.T) {
doTestsParam(t, []string{
"<div class=\"foo\"></div>\n\ntext\n\n<form>the form</form>",
"<p>text</p>\n",
"<p>text</p>\n\n<p>the form</p>\n",
"text <em>inline html</em> more text",
"<p>text inline html more text</p>\n",

View File

@ -385,7 +385,7 @@ func Parse(input []byte, opts Options) *Node {
p.notes = make([]*reference, 0)
}
p.block(preprocess(p, input))
p.block(input)
// Walk the tree and finish up some of unfinished blocks
for p.tip != nil {
p.finalize(p.tip)
@ -441,63 +441,6 @@ func (p *parser) parseRefsToAST() {
})
}
// preprocess does a preparatory first pass over the input:
// - normalize newlines
// - expand tabs (outside of fenced code blocks)
// - copy everything else
func preprocess(p *parser, input []byte) []byte {
var out bytes.Buffer
tabSize := TabSizeDefault
if p.flags&TabSizeEight != 0 {
tabSize = TabSizeDouble
}
beg := 0
lastFencedCodeBlockEnd := 0
for beg < len(input) {
// Find end of this line, then process the line.
end := beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&FencedCode != 0 {
// track fenced code block boundaries to suppress tab expansion
// and reference extraction inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
out.WriteByte('\n')
beg = end
}
// empty input?
if out.Len() == 0 {
out.WriteByte('\n')
}
return out.Bytes()
}
//
// Link references
//

View File

@ -17,6 +17,7 @@ package blackfriday
import (
"bytes"
"io"
)
// SPRenderer is a struct containing state of a Smartypants renderer.
@ -401,13 +402,12 @@ func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
}
// Process is the entry point of the Smartypants renderer.
func (r *SPRenderer) Process(text []byte) []byte {
var buff bytes.Buffer
func (r *SPRenderer) Process(w io.Writer, text []byte) {
mark := 0
for i := 0; i < len(text); i++ {
if action := r.callbacks[text[i]]; action != nil {
if i > mark {
buff.Write(text[mark:i])
w.Write(text[mark:i])
}
previousChar := byte(0)
if i > 0 {
@ -415,12 +415,11 @@ func (r *SPRenderer) Process(text []byte) []byte {
}
var tmp bytes.Buffer
i += action(&tmp, previousChar, text[i:])
buff.Write(tmp.Bytes())
w.Write(tmp.Bytes())
mark = i + 1
}
}
if mark < len(text) {
buff.Write(text[mark:])
w.Write(text[mark:])
}
return buff.Bytes()
}