You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
254 lines
7.6 KiB
254 lines
7.6 KiB
8 years ago
|
// Copyright (c) 2014, David Kitchen <david@buro9.com>
|
||
|
//
|
||
|
// All rights reserved.
|
||
|
//
|
||
|
// Redistribution and use in source and binary forms, with or without
|
||
|
// modification, are permitted provided that the following conditions are met:
|
||
|
//
|
||
|
// * Redistributions of source code must retain the above copyright notice, this
|
||
|
// list of conditions and the following disclaimer.
|
||
|
//
|
||
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
||
|
// this list of conditions and the following disclaimer in the documentation
|
||
|
// and/or other materials provided with the distribution.
|
||
|
//
|
||
|
// * Neither the name of the organisation (Microcosm) nor the names of its
|
||
|
// contributors may be used to endorse or promote products derived from
|
||
|
// this software without specific prior written permission.
|
||
|
//
|
||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||
|
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
|
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||
|
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||
|
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
package bluemonday
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
)
|
||
|
|
||
|
// StrictPolicy returns an empty policy, which will effectively strip all HTML
|
||
|
// elements and their attributes from a document.
|
||
|
func StrictPolicy() *Policy {
|
||
|
return NewPolicy()
|
||
|
}
|
||
|
|
||
|
// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
|
||
|
func StripTagsPolicy() *Policy {
|
||
|
return StrictPolicy()
|
||
|
}
|
||
|
|
||
|
// UGCPolicy returns a policy aimed at user generated content that is a result
|
||
|
// of HTML WYSIWYG tools and Markdown conversions.
|
||
|
//
|
||
|
// This is expected to be a fairly rich document where as much markup as
|
||
|
// possible should be retained. Markdown permits raw HTML so we are basically
|
||
|
// providing a policy to sanitise HTML5 documents safely but with the
|
||
|
// least intrusion on the formatting expectations of the user.
|
||
|
func UGCPolicy() *Policy {
|
||
|
|
||
|
p := NewPolicy()
|
||
|
|
||
|
///////////////////////
|
||
|
// Global attributes //
|
||
|
///////////////////////
|
||
|
|
||
|
// "class" is not permitted as we are not allowing users to style their own
|
||
|
// content
|
||
|
|
||
|
p.AllowStandardAttributes()
|
||
|
|
||
|
//////////////////////////////
|
||
|
// Global URL format policy //
|
||
|
//////////////////////////////
|
||
|
|
||
|
p.AllowStandardURLs()
|
||
|
|
||
|
////////////////////////////////
|
||
|
// Declarations and structure //
|
||
|
////////////////////////////////
|
||
|
|
||
|
// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
|
||
|
// expecting user generated content to be a fragment of HTML and not a full
|
||
|
// document.
|
||
|
|
||
|
//////////////////////////
|
||
|
// Sectioning root tags //
|
||
|
//////////////////////////
|
||
|
|
||
|
// "article" and "aside" are permitted and takes no attributes
|
||
|
p.AllowElements("article", "aside")
|
||
|
|
||
|
// "body" is not permitted as we are expecting user generated content to be a fragment
|
||
|
// of HTML and not a full document.
|
||
|
|
||
|
// "details" is permitted, including the "open" attribute which can either
|
||
|
// be blank or the value "open".
|
||
|
p.AllowAttrs(
|
||
|
"open",
|
||
|
).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
|
||
|
|
||
|
// "fieldset" is not permitted as we are not allowing forms to be created.
|
||
|
|
||
|
// "figure" is permitted and takes no attributes
|
||
|
p.AllowElements("figure")
|
||
|
|
||
|
// "nav" is not permitted as it is assumed that the site (and not the user)
|
||
|
// has defined navigation elements
|
||
|
|
||
|
// "section" is permitted and takes no attributes
|
||
|
p.AllowElements("section")
|
||
|
|
||
|
// "summary" is permitted and takes no attributes
|
||
|
p.AllowElements("summary")
|
||
|
|
||
|
//////////////////////////
|
||
|
// Headings and footers //
|
||
|
//////////////////////////
|
||
|
|
||
|
// "footer" is not permitted as we expect user content to be a fragment and
|
||
|
// not structural to this extent
|
||
|
|
||
|
// "h1" through "h6" are permitted and take no attributes
|
||
|
p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
|
||
|
|
||
|
// "header" is not permitted as we expect user content to be a fragment and
|
||
|
// not structural to this extent
|
||
|
|
||
|
// "hgroup" is permitted and takes no attributes
|
||
|
p.AllowElements("hgroup")
|
||
|
|
||
|
/////////////////////////////////////
|
||
|
// Content grouping and separating //
|
||
|
/////////////////////////////////////
|
||
|
|
||
|
// "blockquote" is permitted, including the "cite" attribute which must be
|
||
|
// a standard URL.
|
||
|
p.AllowAttrs("cite").OnElements("blockquote")
|
||
|
|
||
|
// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
|
||
|
p.AllowElements("br", "div", "hr", "p", "span", "wbr")
|
||
|
|
||
|
///////////
|
||
|
// Links //
|
||
|
///////////
|
||
|
|
||
|
// "a" is permitted
|
||
|
p.AllowAttrs("href").OnElements("a")
|
||
|
|
||
|
// "area" is permitted along with the attributes that map image maps work
|
||
|
p.AllowAttrs("name").Matching(
|
||
|
regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
|
||
|
).OnElements("map")
|
||
|
p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
|
||
|
p.AllowAttrs("coords").Matching(
|
||
|
regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
|
||
|
).OnElements("area")
|
||
|
p.AllowAttrs("href").OnElements("area")
|
||
|
p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
|
||
|
p.AllowAttrs("shape").Matching(
|
||
|
regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
|
||
|
).OnElements("area")
|
||
|
p.AllowAttrs("usemap").Matching(
|
||
|
regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
|
||
|
).OnElements("img")
|
||
|
|
||
|
// "link" is not permitted
|
||
|
|
||
|
/////////////////////
|
||
|
// Phrase elements //
|
||
|
/////////////////////
|
||
|
|
||
|
// The following are all inline phrasing elements
|
||
|
p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
|
||
|
"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
|
||
|
|
||
|
// "q" is permitted and "cite" is a URL and handled by URL policies
|
||
|
p.AllowAttrs("cite").OnElements("q")
|
||
|
|
||
|
// "time" is permitted
|
||
|
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
|
||
|
|
||
|
////////////////////
|
||
|
// Style elements //
|
||
|
////////////////////
|
||
|
|
||
|
// block and inline elements that impart no semantic meaning but style the
|
||
|
// document
|
||
|
p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
|
||
|
|
||
|
// "style" is not permitted as we are not yet sanitising CSS and it is an
|
||
|
// XSS attack vector
|
||
|
|
||
|
//////////////////////
|
||
|
// HTML5 Formatting //
|
||
|
//////////////////////
|
||
|
|
||
|
// "bdi" "bdo" are permitted
|
||
|
p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
|
||
|
|
||
|
// "rp" "rt" "ruby" are permitted
|
||
|
p.AllowElements("rp", "rt", "ruby")
|
||
|
|
||
|
///////////////////////////
|
||
|
// HTML5 Change tracking //
|
||
|
///////////////////////////
|
||
|
|
||
|
// "del" "ins" are permitted
|
||
|
p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
|
||
|
p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
|
||
|
|
||
|
///////////
|
||
|
// Lists //
|
||
|
///////////
|
||
|
|
||
|
p.AllowLists()
|
||
|
|
||
|
////////////
|
||
|
// Tables //
|
||
|
////////////
|
||
|
|
||
|
p.AllowTables()
|
||
|
|
||
|
///////////
|
||
|
// Forms //
|
||
|
///////////
|
||
|
|
||
|
// By and large, forms are not permitted. However there are some form
|
||
|
// elements that can be used to present data, and we do permit those
|
||
|
//
|
||
|
// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
|
||
|
// "textarea" "optgroup" "option" are all not permitted
|
||
|
|
||
|
// "meter" is permitted
|
||
|
p.AllowAttrs(
|
||
|
"value",
|
||
|
"min",
|
||
|
"max",
|
||
|
"low",
|
||
|
"high",
|
||
|
"optimum",
|
||
|
).Matching(Number).OnElements("meter")
|
||
|
|
||
|
// "progress" is permitted
|
||
|
p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
|
||
|
|
||
|
//////////////////////
|
||
|
// Embedded content //
|
||
|
//////////////////////
|
||
|
|
||
|
// Vast majority not permitted
|
||
|
// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
|
||
|
// "video" are all not permitted
|
||
|
|
||
|
p.AllowImages()
|
||
|
|
||
|
return p
|
||
|
}
|