mirror of
https://github.com/danog/blackfriday.git
synced 2024-11-26 20:14:43 +01:00
Rewrite protection against JavaScript injection
This drops the naive approach at <script> tag stripping and resorts to full sanitization of html. The general idea (and the regexps) is grabbed from Stack Exchange's PageDown JavaScript Markdown processor[1]. Like in PageDown, it's implemented as a separate pass over resulting html. Includes a metric ton (but not all) of test cases from here[2]. Several are commented out since they don't pass yet. Stronger (but still incomplete) fix for #11. [1] http://code.google.com/p/pagedown/wiki/PageDown [2] https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
This commit is contained in:
parent
e02c392dc6
commit
55cd82008e
102
html.go
102
html.go
@ -18,6 +18,7 @@ package blackfriday
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
@ -40,6 +41,41 @@ const (
|
||||
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
||||
)
|
||||
|
||||
var (
|
||||
tags = []string{
|
||||
"b",
|
||||
"blockquote",
|
||||
"code",
|
||||
"del",
|
||||
"dd",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"i",
|
||||
"kbd",
|
||||
"li",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"s",
|
||||
"sup",
|
||||
"sub",
|
||||
"strong",
|
||||
"strike",
|
||||
"ul",
|
||||
}
|
||||
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||
)
|
||||
|
||||
// Html is a type that implements the Renderer interface for HTML output.
|
||||
//
|
||||
// Do not create this directly, instead use the HtmlRenderer function.
|
||||
@ -137,6 +173,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
func (options *Html) GetFlags() int {
|
||||
return options.flags
|
||||
}
|
||||
|
||||
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
||||
marker := out.Len()
|
||||
doubleSpace(out)
|
||||
@ -168,32 +208,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
|
||||
}
|
||||
|
||||
doubleSpace(out)
|
||||
if options.flags&HTML_SKIP_SCRIPT != 0 {
|
||||
out.Write(stripTag(string(text), "script", "p"))
|
||||
} else {
|
||||
out.Write(text)
|
||||
}
|
||||
out.WriteByte('\n')
|
||||
}
|
||||
|
||||
func stripTag(text, tag, newTag string) []byte {
|
||||
closeNewTag := fmt.Sprintf("</%s>", newTag)
|
||||
i := 0
|
||||
for i < len(text) && text[i] != '<' {
|
||||
i++
|
||||
}
|
||||
if i == len(text) {
|
||||
return []byte(text)
|
||||
}
|
||||
found, end := findHtmlTagPos([]byte(text[i:]), tag)
|
||||
closeTag := fmt.Sprintf("</%s>", tag)
|
||||
noOpen := text
|
||||
if found {
|
||||
noOpen = text[0:i+1] + newTag + text[end:]
|
||||
}
|
||||
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
|
||||
}
|
||||
|
||||
func (options *Html) HRule(out *bytes.Buffer) {
|
||||
doubleSpace(out)
|
||||
out.WriteString("<hr")
|
||||
@ -781,6 +799,46 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
||||
return false, -1
|
||||
}
|
||||
|
||||
func sanitizeHtml(html []byte) []byte {
|
||||
var result []byte
|
||||
for string(html) != "" {
|
||||
skip, tag, rest := findHtmlTag(html)
|
||||
html = rest
|
||||
result = append(result, skip...)
|
||||
result = append(result, sanitizeTag(tag)...)
|
||||
}
|
||||
return append(result, []byte("\n")...)
|
||||
}
|
||||
|
||||
func sanitizeTag(tag []byte) []byte {
|
||||
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
|
||||
return tag
|
||||
} else {
|
||||
return []byte("")
|
||||
}
|
||||
}
|
||||
|
||||
func skipUntilChar(text []byte, start int, char byte) int {
|
||||
i := start
|
||||
for i < len(text) && text[i] != char {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
|
||||
start := skipUntilChar(html, 0, '<')
|
||||
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
|
||||
if rightAngle > start {
|
||||
skip = html[0:start]
|
||||
tag = html[start : rightAngle+1]
|
||||
rest = html[rightAngle+1:]
|
||||
return
|
||||
}
|
||||
|
||||
return []byte(""), []byte(""), []byte("")
|
||||
}
|
||||
|
||||
func skipSpace(tag []byte, i int) int {
|
||||
for i < len(tag) && isspace(tag[i]) {
|
||||
i++
|
||||
|
105
inline_test.go
105
inline_test.go
@ -90,16 +90,117 @@ func TestRawHtmlTag(t *testing.T) {
|
||||
"<p>alert()</p>\n",
|
||||
|
||||
"<script>alert()</script>\n",
|
||||
"<p>alert()</p>\n",
|
||||
"alert()\n",
|
||||
|
||||
"<script src='foo'></script>\n",
|
||||
"<p></p>\n",
|
||||
"\n",
|
||||
|
||||
"<script src='a>b'></script>\n",
|
||||
"\n",
|
||||
|
||||
"zz <script src='foo'></script>\n",
|
||||
"<p>zz </p>\n",
|
||||
|
||||
"zz <script src=foo></script>\n",
|
||||
"<p>zz </p>\n",
|
||||
|
||||
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
|
||||
"\n",
|
||||
|
||||
`'';!--"<XSS>=&{()}`,
|
||||
"<p>'';!--"=&{()}</p>\n",
|
||||
|
||||
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="javascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<a onmouseover="alert(document.cookie)">xss link</a>`,
|
||||
"<p>xss link</a></p>\n",
|
||||
|
||||
"<a onmouseover=alert(document.cookie)>xss link</a>",
|
||||
"<p>xss link</a></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
|
||||
//"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC=# onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC= onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="javascriptascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav	ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="  javascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
|
||||
//"\n",
|
||||
|
||||
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
|
||||
//"",
|
||||
|
||||
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
|
||||
"<p></p>\n",
|
||||
|
||||
"<SCRIPT SRC=//ha.ckers.org/.j>",
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<IMG SRC="javascript:alert('XSS')"`,
|
||||
//"",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
|
||||
//"",
|
||||
}
|
||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
|
||||
}
|
||||
|
4
latex.go
4
latex.go
@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
|
||||
return &Latex{}
|
||||
}
|
||||
|
||||
func (options *Latex) GetFlags() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// render code chunks using verbatim, or listings if we have a language
|
||||
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
||||
if lang == "" {
|
||||
|
@ -165,6 +165,8 @@ type Renderer interface {
|
||||
// Header and footer
|
||||
DocumentHeader(out *bytes.Buffer)
|
||||
DocumentFooter(out *bytes.Buffer)
|
||||
|
||||
GetFlags() int
|
||||
}
|
||||
|
||||
// Callback functions for inline parsing. One such function is defined
|
||||
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
|
||||
first := firstPass(p, input)
|
||||
second := secondPass(p, first)
|
||||
|
||||
if renderer.GetFlags()&HTML_SKIP_SCRIPT != 0 {
|
||||
second = sanitizeHtml(second)
|
||||
}
|
||||
|
||||
return second
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user