|
|
|
@ -25,20 +25,22 @@ type parser struct { |
|
|
|
|
hasSelfClosingToken bool |
|
|
|
|
// doc is the document root element.
|
|
|
|
|
doc *Node |
|
|
|
|
// The stack of open elements (section 12.2.3.2) and active formatting
|
|
|
|
|
// elements (section 12.2.3.3).
|
|
|
|
|
// The stack of open elements (section 12.2.4.2) and active formatting
|
|
|
|
|
// elements (section 12.2.4.3).
|
|
|
|
|
oe, afe nodeStack |
|
|
|
|
// Element pointers (section 12.2.3.4).
|
|
|
|
|
// Element pointers (section 12.2.4.4).
|
|
|
|
|
head, form *Node |
|
|
|
|
// Other parsing state flags (section 12.2.3.5).
|
|
|
|
|
// Other parsing state flags (section 12.2.4.5).
|
|
|
|
|
scripting, framesetOK bool |
|
|
|
|
// The stack of template insertion modes
|
|
|
|
|
templateStack insertionModeStack |
|
|
|
|
// im is the current insertion mode.
|
|
|
|
|
im insertionMode |
|
|
|
|
// originalIM is the insertion mode to go back to after completing a text
|
|
|
|
|
// or inTableText insertion mode.
|
|
|
|
|
originalIM insertionMode |
|
|
|
|
// fosterParenting is whether new elements should be inserted according to
|
|
|
|
|
// the foster parenting rules (section 12.2.5.3).
|
|
|
|
|
// the foster parenting rules (section 12.2.6.1).
|
|
|
|
|
fosterParenting bool |
|
|
|
|
// quirks is whether the parser is operating in "quirks mode."
|
|
|
|
|
quirks bool |
|
|
|
@ -56,7 +58,7 @@ func (p *parser) top() *Node { |
|
|
|
|
return p.doc |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Stop tags for use in popUntil. These come from section 12.2.3.2.
|
|
|
|
|
// Stop tags for use in popUntil. These come from section 12.2.4.2.
|
|
|
|
|
var ( |
|
|
|
|
defaultScopeStopTags = map[string][]a.Atom{ |
|
|
|
|
"": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, |
|
|
|
@ -79,7 +81,7 @@ const ( |
|
|
|
|
|
|
|
|
|
// popUntil pops the stack of open elements at the highest element whose tag
|
|
|
|
|
// is in matchTags, provided there is no higher element in the scope's stop
|
|
|
|
|
// tags (as defined in section 12.2.3.2). It returns whether or not there was
|
|
|
|
|
// tags (as defined in section 12.2.4.2). It returns whether or not there was
|
|
|
|
|
// such an element. If there was not, popUntil leaves the stack unchanged.
|
|
|
|
|
//
|
|
|
|
|
// For example, the set of stop tags for table scope is: "html", "table". If
|
|
|
|
@ -126,7 +128,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { |
|
|
|
|
return -1 |
|
|
|
|
} |
|
|
|
|
case tableScope: |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table { |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { |
|
|
|
|
return -1 |
|
|
|
|
} |
|
|
|
|
case selectScope: |
|
|
|
@ -162,17 +164,17 @@ func (p *parser) clearStackToContext(s scope) { |
|
|
|
|
tagAtom := p.oe[i].DataAtom |
|
|
|
|
switch s { |
|
|
|
|
case tableScope: |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table { |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { |
|
|
|
|
p.oe = p.oe[:i+1] |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
case tableRowScope: |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tr { |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template { |
|
|
|
|
p.oe = p.oe[:i+1] |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
case tableBodyScope: |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead { |
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template { |
|
|
|
|
p.oe = p.oe[:i+1] |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
@ -183,7 +185,7 @@ func (p *parser) clearStackToContext(s scope) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// generateImpliedEndTags pops nodes off the stack of open elements as long as
|
|
|
|
|
// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
|
|
|
|
|
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
|
|
|
|
|
// If exceptions are specified, nodes with that name will not be popped off.
|
|
|
|
|
func (p *parser) generateImpliedEndTags(exceptions ...string) { |
|
|
|
|
var i int |
|
|
|
@ -192,7 +194,7 @@ loop: |
|
|
|
|
n := p.oe[i] |
|
|
|
|
if n.Type == ElementNode { |
|
|
|
|
switch n.DataAtom { |
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt: |
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: |
|
|
|
|
for _, except := range exceptions { |
|
|
|
|
if n.Data == except { |
|
|
|
|
break loop |
|
|
|
@ -234,9 +236,9 @@ func (p *parser) shouldFosterParent() bool { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// fosterParent adds a child node according to the foster parenting rules.
|
|
|
|
|
// Section 12.2.5.3, "foster parenting".
|
|
|
|
|
// Section 12.2.6.1, "foster parenting".
|
|
|
|
|
func (p *parser) fosterParent(n *Node) { |
|
|
|
|
var table, parent, prev *Node |
|
|
|
|
var table, parent, prev, template *Node |
|
|
|
|
var i int |
|
|
|
|
for i = len(p.oe) - 1; i >= 0; i-- { |
|
|
|
|
if p.oe[i].DataAtom == a.Table { |
|
|
|
@ -245,6 +247,19 @@ func (p *parser) fosterParent(n *Node) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var j int |
|
|
|
|
for j = len(p.oe) - 1; j >= 0; j-- { |
|
|
|
|
if p.oe[j].DataAtom == a.Template { |
|
|
|
|
template = p.oe[j] |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if template != nil && (table == nil || j > i) { |
|
|
|
|
template.AppendChild(n) |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if table == nil { |
|
|
|
|
// The foster parent is the html element.
|
|
|
|
|
parent = p.oe[0] |
|
|
|
@ -304,7 +319,7 @@ func (p *parser) addElement() { |
|
|
|
|
}) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) addFormattingElement() { |
|
|
|
|
tagAtom, attr := p.tok.DataAtom, p.tok.Attr |
|
|
|
|
p.addElement() |
|
|
|
@ -351,7 +366,7 @@ findIdenticalElements: |
|
|
|
|
p.afe = append(p.afe, p.top()) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) clearActiveFormattingElements() { |
|
|
|
|
for { |
|
|
|
|
n := p.afe.pop() |
|
|
|
@ -361,7 +376,7 @@ func (p *parser) clearActiveFormattingElements() { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) reconstructActiveFormattingElements() { |
|
|
|
|
n := p.afe.top() |
|
|
|
|
if n == nil { |
|
|
|
@ -390,12 +405,12 @@ func (p *parser) reconstructActiveFormattingElements() { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.4.
|
|
|
|
|
// Section 12.2.5.
|
|
|
|
|
func (p *parser) acknowledgeSelfClosingTag() { |
|
|
|
|
p.hasSelfClosingToken = false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// An insertion mode (section 12.2.3.1) is the state transition function from
|
|
|
|
|
// An insertion mode (section 12.2.4.1) is the state transition function from
|
|
|
|
|
// a particular state in the HTML5 parser's state machine. It updates the
|
|
|
|
|
// parser's fields depending on parser.tok (where ErrorToken means EOF).
|
|
|
|
|
// It returns whether the token was consumed.
|
|
|
|
@ -403,7 +418,7 @@ type insertionMode func(*parser) bool |
|
|
|
|
|
|
|
|
|
// setOriginalIM sets the insertion mode to return to after completing a text or
|
|
|
|
|
// inTableText insertion mode.
|
|
|
|
|
// Section 12.2.3.1, "using the rules for".
|
|
|
|
|
// Section 12.2.4.1, "using the rules for".
|
|
|
|
|
func (p *parser) setOriginalIM() { |
|
|
|
|
if p.originalIM != nil { |
|
|
|
|
panic("html: bad parser state: originalIM was set twice") |
|
|
|
@ -411,18 +426,38 @@ func (p *parser) setOriginalIM() { |
|
|
|
|
p.originalIM = p.im |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.3.1, "reset the insertion mode".
|
|
|
|
|
// Section 12.2.4.1, "reset the insertion mode".
|
|
|
|
|
func (p *parser) resetInsertionMode() { |
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- { |
|
|
|
|
n := p.oe[i] |
|
|
|
|
if i == 0 && p.context != nil { |
|
|
|
|
last := i == 0 |
|
|
|
|
if last && p.context != nil { |
|
|
|
|
n = p.context |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
switch n.DataAtom { |
|
|
|
|
case a.Select: |
|
|
|
|
if !last { |
|
|
|
|
for ancestor, first := n, p.oe[0]; ancestor != first; { |
|
|
|
|
if ancestor == first { |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
ancestor = p.oe[p.oe.index(ancestor)-1] |
|
|
|
|
switch ancestor.DataAtom { |
|
|
|
|
case a.Template: |
|
|
|
|
p.im = inSelectIM |
|
|
|
|
return |
|
|
|
|
case a.Table: |
|
|
|
|
p.im = inSelectInTableIM |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
p.im = inSelectIM |
|
|
|
|
case a.Td, a.Th: |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.im = inCellIM |
|
|
|
|
case a.Tr: |
|
|
|
|
p.im = inRowIM |
|
|
|
@ -434,25 +469,41 @@ func (p *parser) resetInsertionMode() { |
|
|
|
|
p.im = inColumnGroupIM |
|
|
|
|
case a.Table: |
|
|
|
|
p.im = inTableIM |
|
|
|
|
case a.Template: |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
if n.Namespace != "" { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
p.im = p.templateStack.top() |
|
|
|
|
case a.Head: |
|
|
|
|
p.im = inBodyIM |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.im = inHeadIM |
|
|
|
|
case a.Body: |
|
|
|
|
p.im = inBodyIM |
|
|
|
|
case a.Frameset: |
|
|
|
|
p.im = inFramesetIM |
|
|
|
|
case a.Html: |
|
|
|
|
if p.head == nil { |
|
|
|
|
p.im = beforeHeadIM |
|
|
|
|
} else { |
|
|
|
|
p.im = afterHeadIM |
|
|
|
|
} |
|
|
|
|
default: |
|
|
|
|
if last { |
|
|
|
|
p.im = inBodyIM |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
return |
|
|
|
|
} |
|
|
|
|
p.im = inBodyIM |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
const whitespace = " \t\r\n\f" |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.1.
|
|
|
|
|
// Section 12.2.6.4.1.
|
|
|
|
|
func initialIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -479,7 +530,7 @@ func initialIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.2.
|
|
|
|
|
// Section 12.2.6.4.2.
|
|
|
|
|
func beforeHTMLIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case DoctypeToken: |
|
|
|
@ -517,7 +568,7 @@ func beforeHTMLIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.3.
|
|
|
|
|
// Section 12.2.6.4.3.
|
|
|
|
|
func beforeHeadIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -560,7 +611,7 @@ func beforeHeadIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.4.
|
|
|
|
|
// Section 12.2.6.4.4.
|
|
|
|
|
func inHeadIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -590,19 +641,41 @@ func inHeadIM(p *parser) bool { |
|
|
|
|
case a.Head: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case a.Template: |
|
|
|
|
p.addElement() |
|
|
|
|
p.afe = append(p.afe, &scopeMarker) |
|
|
|
|
p.framesetOK = false |
|
|
|
|
p.im = inTemplateIM |
|
|
|
|
p.templateStack = append(p.templateStack, inTemplateIM) |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
case EndTagToken: |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Head: |
|
|
|
|
n := p.oe.pop() |
|
|
|
|
if n.DataAtom != a.Head { |
|
|
|
|
panic("html: bad parser state: <head> element not found, in the in-head insertion mode") |
|
|
|
|
} |
|
|
|
|
p.oe.pop() |
|
|
|
|
p.im = afterHeadIM |
|
|
|
|
return true |
|
|
|
|
case a.Body, a.Html, a.Br: |
|
|
|
|
p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) |
|
|
|
|
return false |
|
|
|
|
case a.Template: |
|
|
|
|
if !p.oe.contains(a.Template) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.generateImpliedEndTags() |
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- { |
|
|
|
|
if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { |
|
|
|
|
p.oe = p.oe[:i] |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
p.clearActiveFormattingElements() |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.resetInsertionMode() |
|
|
|
|
return true |
|
|
|
|
default: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
@ -622,7 +695,7 @@ func inHeadIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.6.
|
|
|
|
|
// Section 12.2.6.4.6.
|
|
|
|
|
func afterHeadIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -648,7 +721,7 @@ func afterHeadIM(p *parser) bool { |
|
|
|
|
p.addElement() |
|
|
|
|
p.im = inFramesetIM |
|
|
|
|
return true |
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: |
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: |
|
|
|
|
p.oe = append(p.oe, p.head) |
|
|
|
|
defer p.oe.remove(p.head) |
|
|
|
|
return inHeadIM(p) |
|
|
|
@ -660,6 +733,8 @@ func afterHeadIM(p *parser) bool { |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Body, a.Html, a.Br: |
|
|
|
|
// Drop down to creating an implied <body> tag.
|
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
default: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
@ -697,7 +772,7 @@ func copyAttributes(dst *Node, src Token) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.7.
|
|
|
|
|
// Section 12.2.6.4.7.
|
|
|
|
|
func inBodyIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -727,10 +802,16 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
case StartTagToken: |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Html: |
|
|
|
|
if p.oe.contains(a.Template) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
copyAttributes(p.oe[0], p.tok) |
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: |
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
case a.Body: |
|
|
|
|
if p.oe.contains(a.Template) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
if len(p.oe) >= 2 { |
|
|
|
|
body := p.oe[1] |
|
|
|
|
if body.Type == ElementNode && body.DataAtom == a.Body { |
|
|
|
@ -767,9 +848,13 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
// The newline, if any, will be dealt with by the TextToken case.
|
|
|
|
|
p.framesetOK = false |
|
|
|
|
case a.Form: |
|
|
|
|
if p.form == nil { |
|
|
|
|
if p.form != nil && !p.oe.contains(a.Template) { |
|
|
|
|
// Ignore the token
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
p.popUntil(buttonScope, a.P) |
|
|
|
|
p.addElement() |
|
|
|
|
if !p.oe.contains(a.Template) { |
|
|
|
|
p.form = p.top() |
|
|
|
|
} |
|
|
|
|
case a.Li: |
|
|
|
@ -903,6 +988,14 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
p.acknowledgeSelfClosingTag() |
|
|
|
|
p.popUntil(buttonScope, a.P) |
|
|
|
|
p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) |
|
|
|
|
if p.form == nil { |
|
|
|
|
// NOTE: The 'isindex' element has been removed,
|
|
|
|
|
// and the 'template' element has not been designed to be
|
|
|
|
|
// collaborative with the index element.
|
|
|
|
|
//
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
if action != "" { |
|
|
|
|
p.form.Attr = []Attribute{{Key: "action", Val: action}} |
|
|
|
|
} |
|
|
|
@ -952,11 +1045,16 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
} |
|
|
|
|
p.reconstructActiveFormattingElements() |
|
|
|
|
p.addElement() |
|
|
|
|
case a.Rp, a.Rt: |
|
|
|
|
case a.Rb, a.Rtc: |
|
|
|
|
if p.elementInScope(defaultScope, a.Ruby) { |
|
|
|
|
p.generateImpliedEndTags() |
|
|
|
|
} |
|
|
|
|
p.addElement() |
|
|
|
|
case a.Rp, a.Rt: |
|
|
|
|
if p.elementInScope(defaultScope, a.Ruby) { |
|
|
|
|
p.generateImpliedEndTags("rtc") |
|
|
|
|
} |
|
|
|
|
p.addElement() |
|
|
|
|
case a.Math, a.Svg: |
|
|
|
|
p.reconstructActiveFormattingElements() |
|
|
|
|
if p.tok.DataAtom == a.Math { |
|
|
|
@ -993,6 +1091,19 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: |
|
|
|
|
p.popUntil(defaultScope, p.tok.DataAtom) |
|
|
|
|
case a.Form: |
|
|
|
|
if p.oe.contains(a.Template) { |
|
|
|
|
i := p.indexOfElementInScope(defaultScope, a.Form) |
|
|
|
|
if i == -1 { |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
p.generateImpliedEndTags() |
|
|
|
|
if p.oe[i].DataAtom != a.Form { |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
p.popUntil(defaultScope, a.Form) |
|
|
|
|
} else { |
|
|
|
|
node := p.form |
|
|
|
|
p.form = nil |
|
|
|
|
i := p.indexOfElementInScope(defaultScope, a.Form) |
|
|
|
@ -1002,6 +1113,7 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
} |
|
|
|
|
p.generateImpliedEndTags() |
|
|
|
|
p.oe.remove(node) |
|
|
|
|
} |
|
|
|
|
case a.P: |
|
|
|
|
if !p.elementInScope(buttonScope, a.P) { |
|
|
|
|
p.parseImpliedToken(StartTagToken, a.P, a.P.String()) |
|
|
|
@ -1022,6 +1134,8 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
case a.Br: |
|
|
|
|
p.tok.Type = StartTagToken |
|
|
|
|
return false |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
default: |
|
|
|
|
p.inBodyEndTagOther(p.tok.DataAtom) |
|
|
|
|
} |
|
|
|
@ -1030,6 +1144,21 @@ func inBodyIM(p *parser) bool { |
|
|
|
|
Type: CommentNode, |
|
|
|
|
Data: p.tok.Data, |
|
|
|
|
}) |
|
|
|
|
case ErrorToken: |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
if len(p.templateStack) > 0 { |
|
|
|
|
p.im = inTemplateIM |
|
|
|
|
return false |
|
|
|
|
} else { |
|
|
|
|
for _, e := range p.oe { |
|
|
|
|
switch e.DataAtom { |
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th, |
|
|
|
|
a.Thead, a.Tr, a.Body, a.Html: |
|
|
|
|
default: |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return true |
|
|
|
@ -1160,7 +1289,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
|
|
|
|
|
// "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
|
|
|
|
|
// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
|
|
|
|
|
// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
|
|
|
|
|
func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { |
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- { |
|
|
|
@ -1174,7 +1303,7 @@ func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.8.
|
|
|
|
|
// Section 12.2.6.4.8.
|
|
|
|
|
func textIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case ErrorToken: |
|
|
|
@ -1203,12 +1332,9 @@ func textIM(p *parser) bool { |
|
|
|
|
return p.tok.Type == EndTagToken |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.9.
|
|
|
|
|
// Section 12.2.6.4.9.
|
|
|
|
|
func inTableIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case ErrorToken: |
|
|
|
|
// Stop parsing.
|
|
|
|
|
return true |
|
|
|
|
case TextToken: |
|
|
|
|
p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1) |
|
|
|
|
switch p.oe.top().DataAtom { |
|
|
|
@ -1249,7 +1375,7 @@ func inTableIM(p *parser) bool { |
|
|
|
|
} |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case a.Style, a.Script: |
|
|
|
|
case a.Style, a.Script, a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
case a.Input: |
|
|
|
|
for _, t := range p.tok.Attr { |
|
|
|
@ -1261,7 +1387,7 @@ func inTableIM(p *parser) bool { |
|
|
|
|
} |
|
|
|
|
// Otherwise drop down to the default action.
|
|
|
|
|
case a.Form: |
|
|
|
|
if p.form != nil { |
|
|
|
|
if p.oe.contains(a.Template) || p.form != nil { |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
@ -1291,6 +1417,8 @@ func inTableIM(p *parser) bool { |
|
|
|
|
case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
} |
|
|
|
|
case CommentToken: |
|
|
|
|
p.addChild(&Node{ |
|
|
|
@ -1301,6 +1429,8 @@ func inTableIM(p *parser) bool { |
|
|
|
|
case DoctypeToken: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case ErrorToken: |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
p.fosterParenting = true |
|
|
|
@ -1309,7 +1439,7 @@ func inTableIM(p *parser) bool { |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.11.
|
|
|
|
|
// Section 12.2.6.4.11.
|
|
|
|
|
func inCaptionIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case StartTagToken: |
|
|
|
@ -1355,7 +1485,7 @@ func inCaptionIM(p *parser) bool { |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.12.
|
|
|
|
|
// Section 12.2.6.4.12.
|
|
|
|
|
func inColumnGroupIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -1386,11 +1516,13 @@ func inColumnGroupIM(p *parser) bool { |
|
|
|
|
p.oe.pop() |
|
|
|
|
p.acknowledgeSelfClosingTag() |
|
|
|
|
return true |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
} |
|
|
|
|
case EndTagToken: |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Colgroup: |
|
|
|
|
if p.oe.top().DataAtom != a.Html { |
|
|
|
|
if p.oe.top().DataAtom == a.Colgroup { |
|
|
|
|
p.oe.pop() |
|
|
|
|
p.im = inTableIM |
|
|
|
|
} |
|
|
|
@ -1398,17 +1530,21 @@ func inColumnGroupIM(p *parser) bool { |
|
|
|
|
case a.Col: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
} |
|
|
|
|
case ErrorToken: |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
if p.oe.top().DataAtom != a.Colgroup { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
if p.oe.top().DataAtom != a.Html { |
|
|
|
|
p.oe.pop() |
|
|
|
|
p.im = inTableIM |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.13.
|
|
|
|
|
// Section 12.2.6.4.13.
|
|
|
|
|
func inTableBodyIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case StartTagToken: |
|
|
|
@ -1460,7 +1596,7 @@ func inTableBodyIM(p *parser) bool { |
|
|
|
|
return inTableIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.14.
|
|
|
|
|
// Section 12.2.6.4.14.
|
|
|
|
|
func inRowIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case StartTagToken: |
|
|
|
@ -1511,7 +1647,7 @@ func inRowIM(p *parser) bool { |
|
|
|
|
return inTableIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.15.
|
|
|
|
|
// Section 12.2.6.4.15.
|
|
|
|
|
func inCellIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case StartTagToken: |
|
|
|
@ -1560,12 +1696,9 @@ func inCellIM(p *parser) bool { |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.16.
|
|
|
|
|
// Section 12.2.6.4.16.
|
|
|
|
|
func inSelectIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case ErrorToken: |
|
|
|
|
// Stop parsing.
|
|
|
|
|
return true |
|
|
|
|
case TextToken: |
|
|
|
|
p.addText(strings.Replace(p.tok.Data, "\x00", "", -1)) |
|
|
|
|
case StartTagToken: |
|
|
|
@ -1597,7 +1730,7 @@ func inSelectIM(p *parser) bool { |
|
|
|
|
p.tokenizer.NextIsNotRawText() |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case a.Script: |
|
|
|
|
case a.Script, a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
} |
|
|
|
|
case EndTagToken: |
|
|
|
@ -1618,6 +1751,8 @@ func inSelectIM(p *parser) bool { |
|
|
|
|
if p.popUntil(selectScope, a.Select) { |
|
|
|
|
p.resetInsertionMode() |
|
|
|
|
} |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
} |
|
|
|
|
case CommentToken: |
|
|
|
|
p.addChild(&Node{ |
|
|
|
@ -1627,12 +1762,14 @@ func inSelectIM(p *parser) bool { |
|
|
|
|
case DoctypeToken: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
case ErrorToken: |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.17.
|
|
|
|
|
// Section 12.2.6.4.17.
|
|
|
|
|
func inSelectInTableIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case StartTagToken, EndTagToken: |
|
|
|
@ -1650,7 +1787,73 @@ func inSelectInTableIM(p *parser) bool { |
|
|
|
|
return inSelectIM(p) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.18.
|
|
|
|
|
// Section 12.2.6.4.18.
|
|
|
|
|
func inTemplateIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken, CommentToken, DoctypeToken: |
|
|
|
|
return inBodyIM(p) |
|
|
|
|
case StartTagToken: |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead: |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.templateStack = append(p.templateStack, inTableIM) |
|
|
|
|
p.im = inTableIM |
|
|
|
|
return false |
|
|
|
|
case a.Col: |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.templateStack = append(p.templateStack, inColumnGroupIM) |
|
|
|
|
p.im = inColumnGroupIM |
|
|
|
|
return false |
|
|
|
|
case a.Tr: |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.templateStack = append(p.templateStack, inTableBodyIM) |
|
|
|
|
p.im = inTableBodyIM |
|
|
|
|
return false |
|
|
|
|
case a.Td, a.Th: |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.templateStack = append(p.templateStack, inRowIM) |
|
|
|
|
p.im = inRowIM |
|
|
|
|
return false |
|
|
|
|
default: |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.templateStack = append(p.templateStack, inBodyIM) |
|
|
|
|
p.im = inBodyIM |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
case EndTagToken: |
|
|
|
|
switch p.tok.DataAtom { |
|
|
|
|
case a.Template: |
|
|
|
|
return inHeadIM(p) |
|
|
|
|
default: |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
case ErrorToken: |
|
|
|
|
if !p.oe.contains(a.Template) { |
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.generateImpliedEndTags() |
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- { |
|
|
|
|
if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template { |
|
|
|
|
p.oe = p.oe[:i] |
|
|
|
|
break |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
p.clearActiveFormattingElements() |
|
|
|
|
p.templateStack.pop() |
|
|
|
|
p.resetInsertionMode() |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.6.4.19.
|
|
|
|
|
func afterBodyIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case ErrorToken: |
|
|
|
@ -1688,7 +1891,7 @@ func afterBodyIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.19.
|
|
|
|
|
// Section 12.2.6.4.20.
|
|
|
|
|
func inFramesetIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case CommentToken: |
|
|
|
@ -1738,7 +1941,7 @@ func inFramesetIM(p *parser) bool { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.20.
|
|
|
|
|
// Section 12.2.6.4.21.
|
|
|
|
|
func afterFramesetIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case CommentToken: |
|
|
|
@ -1777,7 +1980,7 @@ func afterFramesetIM(p *parser) bool { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.21.
|
|
|
|
|
// Section 12.2.6.4.22.
|
|
|
|
|
func afterAfterBodyIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case ErrorToken: |
|
|
|
@ -1806,7 +2009,7 @@ func afterAfterBodyIM(p *parser) bool { |
|
|
|
|
return false |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.22.
|
|
|
|
|
// Section 12.2.6.4.23.
|
|
|
|
|
func afterAfterFramesetIM(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case CommentToken: |
|
|
|
@ -1844,7 +2047,7 @@ func afterAfterFramesetIM(p *parser) bool { |
|
|
|
|
|
|
|
|
|
const whitespaceOrNUL = whitespace + "\x00" |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.5.
|
|
|
|
|
// Section 12.2.6.5
|
|
|
|
|
func parseForeignContent(p *parser) bool { |
|
|
|
|
switch p.tok.Type { |
|
|
|
|
case TextToken: |
|
|
|
@ -1924,7 +2127,7 @@ func parseForeignContent(p *parser) bool { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Section 12.2.5.
|
|
|
|
|
// Section 12.2.6.
|
|
|
|
|
func (p *parser) inForeignContent() bool { |
|
|
|
|
if len(p.oe) == 0 { |
|
|
|
|
return false |
|
|
|
@ -2012,6 +2215,15 @@ func (p *parser) parse() error { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Parse returns the parse tree for the HTML from the given Reader.
|
|
|
|
|
//
|
|
|
|
|
// It implements the HTML5 parsing algorithm
|
|
|
|
|
// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
|
|
|
|
|
// which is very complicated. The resultant tree can contain implicitly created
|
|
|
|
|
// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
|
|
|
|
|
// differ from the nesting implied by a naive processing of start and end
|
|
|
|
|
// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
|
|
|
|
|
// with no corresponding node in the resulting tree.
|
|
|
|
|
//
|
|
|
|
|
// The input is assumed to be UTF-8 encoded.
|
|
|
|
|
func Parse(r io.Reader) (*Node, error) { |
|
|
|
|
p := &parser{ |
|
|
|
@ -2033,6 +2245,8 @@ func Parse(r io.Reader) (*Node, error) { |
|
|
|
|
// ParseFragment parses a fragment of HTML and returns the nodes that were
|
|
|
|
|
// found. If the fragment is the InnerHTML for an existing element, pass that
|
|
|
|
|
// element in context.
|
|
|
|
|
//
|
|
|
|
|
// It has the same intricacies as Parse.
|
|
|
|
|
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { |
|
|
|
|
contextTag := "" |
|
|
|
|
if context != nil { |
|
|
|
@ -2064,6 +2278,9 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { |
|
|
|
|
} |
|
|
|
|
p.doc.AppendChild(root) |
|
|
|
|
p.oe = nodeStack{root} |
|
|
|
|
if context != nil && context.DataAtom == a.Template { |
|
|
|
|
p.templateStack = append(p.templateStack, inTemplateIM) |
|
|
|
|
} |
|
|
|
|
p.resetInsertionMode() |
|
|
|
|
|
|
|
|
|
for n := context; n != nil; n = n.Parent { |
|
|
|
|