Merge pull request #280 from russross/fix-279-and-refactor

Fix fenced code block rendering with content resembling references.
2024-12-02 09:27:49 +01:00 · 2016-07-15 22:37:58 +03:00 · 2016-07-15 22:37:58 +03:00 · 4e6f303e8d
commit 4e6f303e8d
parent 1d6b8e9301 a5812bb8f2
6 changed files with 229 additions and 85 deletions
--- a/block.go
+++ b/block.go
@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {
 		// }
 		// ```
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if i := p.fencedCode(out, data, true); i > 0 {
+			if i := p.fencedCodeBlock(out, data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@ -515,7 +515,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int {
 	return i + skip
 }

-func (p *parser) isEmpty(data []byte) int {
+func (*parser) isEmpty(data []byte) int {
 	// it is okay to call isEmpty on an empty buffer
 	if len(data) == 0 {
 		return 0
@ -530,7 +530,7 @@ func (p *parser) isEmpty(data []byte) int {
 	return i + 1
 }

-func (p *parser) isHRule(data []byte) bool {
+func (*parser) isHRule(data []byte) bool {
 	i := 0

 	// skip up to three spaces
@ -559,21 +559,24 @@ func (p *parser) isHRule(data []byte) bool {
 	return n >= 3
 }

-func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
+// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
+// and returns the end index if so, or 0 otherwise. It also returns the marker found.
+// If syntax is not nil, it gets set to the syntax specified in the fence line.
+// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
+func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 	i, size := 0, 0
-	skip = 0

 	// skip up to three spaces
 	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
-	if i >= len(data) {
-		return
-	}

 	// check for the marker characters: ~ or `
+	if i >= len(data) {
+		return 0, ""
+	}
 	if data[i] != '~' && data[i] != '`' {
-		return
+		return 0, ""
 	}

 	c := data[i]
@ -584,27 +587,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 		i++
 	}

-	if i >= len(data) {
-		return
-	}
-
 	// the marker char must occur at least 3 times
 	if size < 3 {
-		return
+		return 0, ""
 	}
 	marker = string(data[i-size : i])

 	// if this is the end marker, it must match the beginning marker
 	if oldmarker != "" && marker != oldmarker {
-		return
+		return 0, ""
 	}

+	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
+	// into one, always get the syntax, and discard it if the caller doesn't care.
 	if syntax != nil {
 		syn := 0
 		i = skipChar(data, i, ' ')

 		if i >= len(data) {
-			return
+			if newlineOptional && i == len(data) {
+				return i, marker
+			}
+			return 0, ""
 		}

 		syntaxStart := i
@ -619,7 +623,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}

 			if i >= len(data) || data[i] != '}' {
-				return
+				return 0, ""
 			}

 			// strip all whitespace at the beginning and the end
@ -641,22 +645,26 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}
 		}

-		language := string(data[syntaxStart : syntaxStart+syn])
-		*syntax = &language
+		*syntax = string(data[syntaxStart : syntaxStart+syn])
 	}

 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		return
+		if newlineOptional && i == len(data) {
+			return i, marker
+		}
+		return 0, ""
 	}

-	skip = i + 1
-	return
+	return i + 1, marker // Take newline into account.
 }

-func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
-	var lang *string
-	beg, marker := p.isFencedCode(data, &lang, "")
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
+	var syntax string
+	beg, marker := isFenceLine(data, &syntax, "", true)
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}
@ -667,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		// safe to assume beg < len(data)

 		// check for the end of the code block
-		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@ -688,11 +697,6 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		beg = end
 	}

-	syntax := ""
-	if lang != nil {
-		syntax = *lang
-	}
-
 	if doRender {
 		p.r.BlockCode(out, work.Bytes(), syntax)
 	}
@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				if i := p.fencedCode(out, data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {

 		// if there's a fenced code block, paragraph is over
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if p.fencedCode(out, current, false) > 0 {
+			if p.fencedCodeBlock(out, current, false) > 0 {
 				p.renderParagraph(out, data[:i])
 				return i
 			}
--- a/block_test.go
+++ b/block_test.go
@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) {

 		"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
 		"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
+
+		"```\n[]:()\n```\n",
+		"<pre><code>[]:()\n</code></pre>\n",
+
+		"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
+		"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
 	}
 	doTestsBlock(t, tests, EXTENSION_FENCED_CODE)
 }
@ -1636,3 +1642,68 @@ func TestCDATA(t *testing.T) {
 `,
 	}, EXTENSION_FENCED_CODE)
 }
+
+func TestIsFenceLine(t *testing.T) {
+	tests := []struct {
+		data            []byte
+		syntaxRequested bool
+		newlineOptional bool
+		wantEnd         int
+		wantMarker      string
+		wantSyntax      string
+	}{
+		{
+			data:    []byte("```"),
+			wantEnd: 0,
+		},
+		{
+			data:       []byte("```\nstuff here\n"),
+			wantEnd:    4,
+			wantMarker: "```",
+		},
+		{
+			data:    []byte("stuff here\n```\n"),
+			wantEnd: 0,
+		},
+		{
+			data:            []byte("```"),
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("```"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("``` go"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         6,
+			wantMarker:      "```",
+			wantSyntax:      "go",
+		},
+	}
+
+	for _, test := range tests {
+		var syntax *string
+		if test.syntaxRequested {
+			syntax = new(string)
+		}
+		end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+		if got, want := end, test.wantEnd; got != want {
+			t.Errorf("got end %v, want %v", got, want)
+		}
+		if got, want := marker, test.wantMarker; got != want {
+			t.Errorf("got marker %q, want %q", got, want)
+		}
+		if test.syntaxRequested {
+			if got, want := *syntax, test.wantSyntax; got != want {
+				t.Errorf("got syntax %q, want %q", got, want)
+			}
+		}
+	}
+}
--- a/inline_test.go
+++ b/inline_test.go
@ -59,13 +59,11 @@ func doTestsInlineParam(t *testing.T, tests []string, opts Options, htmlFlags in
 	params HtmlRendererParameters) {
 	// catch and report panics
 	var candidate string
-	/*
-		defer func() {
-			if err := recover(); err != nil {
-				t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err)
-			}
-		}()
-	*/
+	defer func() {
+		if err := recover(); err != nil {
+			t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err)
+		}
+	}()

 	for i := 0; i+1 < len(tests); i += 2 {
 		input := tests[i]
@ -722,10 +720,6 @@ func TestReferenceLink(t *testing.T) {

 		"[link][ref]\n   [ref]: /url/",
 		"<p><a href=\"/url/\">link</a></p>\n",
-
-		// Issue 172 in blackfriday
-		"[]:<",
-		"<p>[]:&lt;</p>\n",
 	}
 	doLinkTestsInline(t, tests)
 }
--- a/markdown.go
+++ b/markdown.go
@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
 }

 // first pass:
-// - extract references
-// - expand tabs
 // - normalize newlines
+// - extract references (outside of fenced code blocks)
+// - expand tabs (outside of fenced code blocks)
 // - copy everything else
 func firstPass(p *parser, input []byte) []byte {
 	var out bytes.Buffer
@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte {
 	if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
 		tabSize = TAB_SIZE_EIGHT
 	}
-	beg, end := 0, 0
+	beg := 0
 	lastFencedCodeBlockEnd := 0
-	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:], tabSize); end > 0 {
-			beg += end
-		} else { // skip to the next line
-			end = beg
-			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-				end++
-			}
-
-			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				// track fenced code block boundaries to suppress tab expansion
-				// inside them:
-				if beg >= lastFencedCodeBlockEnd {
-					if i := p.fencedCode(&out, input[beg:], false); i > 0 {
-						lastFencedCodeBlockEnd = beg + i
-					}
-				}
-			}
-
-			// add the line body if present
-			if end > beg {
-				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-					out.Write(input[beg:end])
-				} else {
-					expandTabs(&out, input[beg:end], tabSize)
-				}
-			}
-			out.WriteByte('\n')
-
-			if end < len(input) && input[end] == '\r' {
-				end++
-			}
-			if end < len(input) && input[end] == '\n' {
-				end++
-			}
-
-			beg = end
+	for beg < len(input) {
+		// Find end of this line, then process the line.
+		end := beg
+		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
+			end++
 		}
+
+		if p.flags&EXTENSION_FENCED_CODE != 0 {
+			// track fenced code block boundaries to suppress tab expansion
+			// and reference extraction inside them:
+			if beg >= lastFencedCodeBlockEnd {
+				if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
+					lastFencedCodeBlockEnd = beg + i
+				}
+			}
+		}
+
+		// add the line body if present
+		if end > beg {
+			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
+				out.Write(input[beg:end])
+			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
+				beg += refEnd
+				continue
+			} else {
+				expandTabs(&out, input[beg:end], tabSize)
+			}
+		}
+
+		if end < len(input) && input[end] == '\r' {
+			end++
+		}
+		if end < len(input) && input[end] == '\n' {
+			end++
+		}
+		out.WriteByte('\n')
+
+		beg = end
 	}

 	// empty input?
--- a/markdown_test.go
+++ b/markdown_test.go
@ -0,0 +1,75 @@
+//
+// Blackfriday Markdown Processor
+// Available at http://github.com/russross/blackfriday
+//
+// Copyright © 2011 Russ Ross <russ@russross.com>.
+// Distributed under the Simplified BSD License.
+// See README.md for details.
+//
+
+//
+// Unit tests for full document parsing and rendering
+//
+
+package blackfriday
+
+import (
+	"testing"
+)
+
+func runMarkdown(input string) string {
+	return string(MarkdownCommon([]byte(input)))
+}
+
+func doTests(t *testing.T, tests []string) {
+	// catch and report panics
+	var candidate string
+	defer func() {
+		if err := recover(); err != nil {
+			t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err)
+		}
+	}()
+
+	for i := 0; i+1 < len(tests); i += 2 {
+		input := tests[i]
+		candidate = input
+		expected := tests[i+1]
+		actual := runMarkdown(candidate)
+		if actual != expected {
+			t.Errorf("\nInput   [%#v]\nExpected[%#v]\nActual  [%#v]",
+				candidate, expected, actual)
+		}
+
+		// now test every substring to stress test bounds checking
+		if !testing.Short() {
+			for start := 0; start < len(input); start++ {
+				for end := start + 1; end <= len(input); end++ {
+					candidate = input[start:end]
+					_ = runMarkdown(candidate)
+				}
+			}
+		}
+	}
+}
+
+func TestDocument(t *testing.T) {
+	var tests = []string{
+		// Empty document.
+		"",
+		"",
+
+		" ",
+		"",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/172
+		"[]:<",
+		"<p>[]:&lt;</p>\n",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/173
+		"   [",
+		"<p>[</p>\n",
+	}
+	doTests(t, tests)
+}
--- a/ref_test.go
+++ b/ref_test.go
@ -29,7 +29,7 @@ func doTestsReference(t *testing.T, files []string, flag int) {
 	var candidate string
 	defer func() {
 		if err := recover(); err != nil {
-			t.Errorf("\npanic while processing [%#v]\n", candidate)
+			t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err)
 		}
 	}()