You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							253 lines
						
					
					
						
							7.6 KiB
						
					
					
				
			
		
		
	
	
							253 lines
						
					
					
						
							7.6 KiB
						
					
					
				| // Copyright (c) 2014, David Kitchen <david@buro9.com>
 | |
| //
 | |
| // All rights reserved.
 | |
| //
 | |
| // Redistribution and use in source and binary forms, with or without
 | |
| // modification, are permitted provided that the following conditions are met:
 | |
| //
 | |
| // * Redistributions of source code must retain the above copyright notice, this
 | |
| //   list of conditions and the following disclaimer.
 | |
| //
 | |
| // * Redistributions in binary form must reproduce the above copyright notice,
 | |
| //   this list of conditions and the following disclaimer in the documentation
 | |
| //   and/or other materials provided with the distribution.
 | |
| //
 | |
| // * Neither the name of the organisation (Microcosm) nor the names of its
 | |
| //   contributors may be used to endorse or promote products derived from
 | |
| //   this software without specific prior written permission.
 | |
| //
 | |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 | |
| // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 | |
| // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | |
| // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 | |
| // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 | |
| // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| 
 | |
| package bluemonday
 | |
| 
 | |
| import (
 | |
| 	"regexp"
 | |
| )
 | |
| 
 | |
| // StrictPolicy returns an empty policy, which will effectively strip all HTML
 | |
| // elements and their attributes from a document.
 | |
| func StrictPolicy() *Policy {
 | |
| 	return NewPolicy()
 | |
| }
 | |
| 
 | |
| // StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
 | |
| func StripTagsPolicy() *Policy {
 | |
| 	return StrictPolicy()
 | |
| }
 | |
| 
 | |
| // UGCPolicy returns a policy aimed at user generated content that is a result
 | |
| // of HTML WYSIWYG tools and Markdown conversions.
 | |
| //
 | |
| // This is expected to be a fairly rich document where as much markup as
 | |
| // possible should be retained. Markdown permits raw HTML so we are basically
 | |
| // providing a policy to sanitise HTML5 documents safely but with the
 | |
| // least intrusion on the formatting expectations of the user.
 | |
| func UGCPolicy() *Policy {
 | |
| 
 | |
| 	p := NewPolicy()
 | |
| 
 | |
| 	///////////////////////
 | |
| 	// Global attributes //
 | |
| 	///////////////////////
 | |
| 
 | |
| 	// "class" is not permitted as we are not allowing users to style their own
 | |
| 	// content
 | |
| 
 | |
| 	p.AllowStandardAttributes()
 | |
| 
 | |
| 	//////////////////////////////
 | |
| 	// Global URL format policy //
 | |
| 	//////////////////////////////
 | |
| 
 | |
| 	p.AllowStandardURLs()
 | |
| 
 | |
| 	////////////////////////////////
 | |
| 	// Declarations and structure //
 | |
| 	////////////////////////////////
 | |
| 
 | |
| 	// "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
 | |
| 	// expecting user generated content to be a fragment of HTML and not a full
 | |
| 	// document.
 | |
| 
 | |
| 	//////////////////////////
 | |
| 	// Sectioning root tags //
 | |
| 	//////////////////////////
 | |
| 
 | |
| 	// "article" and "aside" are permitted and takes no attributes
 | |
| 	p.AllowElements("article", "aside")
 | |
| 
 | |
| 	// "body" is not permitted as we are expecting user generated content to be a fragment
 | |
| 	// of HTML and not a full document.
 | |
| 
 | |
| 	// "details" is permitted, including the "open" attribute which can either
 | |
| 	// be blank or the value "open".
 | |
| 	p.AllowAttrs(
 | |
| 		"open",
 | |
| 	).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
 | |
| 
 | |
| 	// "fieldset" is not permitted as we are not allowing forms to be created.
 | |
| 
 | |
| 	// "figure" is permitted and takes no attributes
 | |
| 	p.AllowElements("figure")
 | |
| 
 | |
| 	// "nav" is not permitted as it is assumed that the site (and not the user)
 | |
| 	// has defined navigation elements
 | |
| 
 | |
| 	// "section" is permitted and takes no attributes
 | |
| 	p.AllowElements("section")
 | |
| 
 | |
| 	// "summary" is permitted and takes no attributes
 | |
| 	p.AllowElements("summary")
 | |
| 
 | |
| 	//////////////////////////
 | |
| 	// Headings and footers //
 | |
| 	//////////////////////////
 | |
| 
 | |
| 	// "footer" is not permitted as we expect user content to be a fragment and
 | |
| 	// not structural to this extent
 | |
| 
 | |
| 	// "h1" through "h6" are permitted and take no attributes
 | |
| 	p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
 | |
| 
 | |
| 	// "header" is not permitted as we expect user content to be a fragment and
 | |
| 	// not structural to this extent
 | |
| 
 | |
| 	// "hgroup" is permitted and takes no attributes
 | |
| 	p.AllowElements("hgroup")
 | |
| 
 | |
| 	/////////////////////////////////////
 | |
| 	// Content grouping and separating //
 | |
| 	/////////////////////////////////////
 | |
| 
 | |
| 	// "blockquote" is permitted, including the "cite" attribute which must be
 | |
| 	// a standard URL.
 | |
| 	p.AllowAttrs("cite").OnElements("blockquote")
 | |
| 
 | |
| 	// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
 | |
| 	p.AllowElements("br", "div", "hr", "p", "span", "wbr")
 | |
| 
 | |
| 	///////////
 | |
| 	// Links //
 | |
| 	///////////
 | |
| 
 | |
| 	// "a" is permitted
 | |
| 	p.AllowAttrs("href").OnElements("a")
 | |
| 
 | |
| 	// "area" is permitted along with the attributes that map image maps work
 | |
| 	p.AllowAttrs("name").Matching(
 | |
| 		regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
 | |
| 	).OnElements("map")
 | |
| 	p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
 | |
| 	p.AllowAttrs("coords").Matching(
 | |
| 		regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
 | |
| 	).OnElements("area")
 | |
| 	p.AllowAttrs("href").OnElements("area")
 | |
| 	p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
 | |
| 	p.AllowAttrs("shape").Matching(
 | |
| 		regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
 | |
| 	).OnElements("area")
 | |
| 	p.AllowAttrs("usemap").Matching(
 | |
| 		regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
 | |
| 	).OnElements("img")
 | |
| 
 | |
| 	// "link" is not permitted
 | |
| 
 | |
| 	/////////////////////
 | |
| 	// Phrase elements //
 | |
| 	/////////////////////
 | |
| 
 | |
| 	// The following are all inline phrasing elements
 | |
| 	p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
 | |
| 		"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
 | |
| 
 | |
| 	// "q" is permitted and "cite" is a URL and handled by URL policies
 | |
| 	p.AllowAttrs("cite").OnElements("q")
 | |
| 
 | |
| 	// "time" is permitted
 | |
| 	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
 | |
| 
 | |
| 	////////////////////
 | |
| 	// Style elements //
 | |
| 	////////////////////
 | |
| 
 | |
| 	// block and inline elements that impart no semantic meaning but style the
 | |
| 	// document
 | |
| 	p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
 | |
| 
 | |
| 	// "style" is not permitted as we are not yet sanitising CSS and it is an
 | |
| 	// XSS attack vector
 | |
| 
 | |
| 	//////////////////////
 | |
| 	// HTML5 Formatting //
 | |
| 	//////////////////////
 | |
| 
 | |
| 	// "bdi" "bdo" are permitted
 | |
| 	p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
 | |
| 
 | |
| 	// "rp" "rt" "ruby" are permitted
 | |
| 	p.AllowElements("rp", "rt", "ruby")
 | |
| 
 | |
| 	///////////////////////////
 | |
| 	// HTML5 Change tracking //
 | |
| 	///////////////////////////
 | |
| 
 | |
| 	// "del" "ins" are permitted
 | |
| 	p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
 | |
| 	p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
 | |
| 
 | |
| 	///////////
 | |
| 	// Lists //
 | |
| 	///////////
 | |
| 
 | |
| 	p.AllowLists()
 | |
| 
 | |
| 	////////////
 | |
| 	// Tables //
 | |
| 	////////////
 | |
| 
 | |
| 	p.AllowTables()
 | |
| 
 | |
| 	///////////
 | |
| 	// Forms //
 | |
| 	///////////
 | |
| 
 | |
| 	// By and large, forms are not permitted. However there are some form
 | |
| 	// elements that can be used to present data, and we do permit those
 | |
| 	//
 | |
| 	// "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
 | |
| 	// "textarea" "optgroup" "option" are all not permitted
 | |
| 
 | |
| 	// "meter" is permitted
 | |
| 	p.AllowAttrs(
 | |
| 		"value",
 | |
| 		"min",
 | |
| 		"max",
 | |
| 		"low",
 | |
| 		"high",
 | |
| 		"optimum",
 | |
| 	).Matching(Number).OnElements("meter")
 | |
| 
 | |
| 	// "progress" is permitted
 | |
| 	p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
 | |
| 
 | |
| 	//////////////////////
 | |
| 	// Embedded content //
 | |
| 	//////////////////////
 | |
| 
 | |
| 	// Vast majority not permitted
 | |
| 	// "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
 | |
| 	// "video" are all not permitted
 | |
| 
 | |
| 	p.AllowImages()
 | |
| 
 | |
| 	return p
 | |
| }
 | |
| 
 |