transition.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"strings"
    10  )
    11  
    12  // transitionFunc is the array of context transition functions for text nodes.
    13  // A transition function takes a context and template text input, and returns
    14  // the updated context and the number of bytes consumed from the front of the
    15  // input.
    16  var transitionFunc = [...]func(context, []byte) (context, int){
    17  	stateText:           tText,
    18  	stateTag:            tTag,
    19  	stateAttrName:       tAttrName,
    20  	stateAfterName:      tAfterName,
    21  	stateBeforeValue:    tBeforeValue,
    22  	stateHTMLCmt:        tHTMLCmt,
    23  	stateRCDATA:         tSpecialTagEnd,
    24  	stateAttr:           tAttr,
    25  	stateURL:            tURL,
    26  	stateMetaContent:    tMetaContent,
    27  	stateMetaContentURL: tMetaContentURL,
    28  	stateSrcset:         tURL,
    29  	stateJS:             tJS,
    30  	stateJSDqStr:        tJSDelimited,
    31  	stateJSSqStr:        tJSDelimited,
    32  	stateJSRegexp:       tJSDelimited,
    33  	stateJSTmplLit:      tJSTmpl,
    34  	stateJSBlockCmt:     tBlockCmt,
    35  	stateJSLineCmt:      tLineCmt,
    36  	stateJSHTMLOpenCmt:  tLineCmt,
    37  	stateJSHTMLCloseCmt: tLineCmt,
    38  	stateCSS:            tCSS,
    39  	stateCSSDqStr:       tCSSStr,
    40  	stateCSSSqStr:       tCSSStr,
    41  	stateCSSDqURL:       tCSSStr,
    42  	stateCSSSqURL:       tCSSStr,
    43  	stateCSSURL:         tCSSStr,
    44  	stateCSSBlockCmt:    tBlockCmt,
    45  	stateCSSLineCmt:     tLineCmt,
    46  	stateError:          tError,
    47  }
    48  
    49  var commentStart = []byte("<!--")
    50  var commentEnd = []byte("-->")
    51  
    52  // tText is the context transition function for the text state.
    53  func tText(c context, s []byte) (context, int) {
    54  	k := 0
    55  	for {
    56  		i := k + bytes.IndexByte(s[k:], '<')
    57  		if i < k || i+1 == len(s) {
    58  			return c, len(s)
    59  		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
    60  			return context{state: stateHTMLCmt}, i + 4
    61  		}
    62  		i++
    63  		end := false
    64  		if s[i] == '/' {
    65  			if i+1 == len(s) {
    66  				return c, len(s)
    67  			}
    68  			end, i = true, i+1
    69  		}
    70  		j, e := eatTagName(s, i)
    71  		if j != i {
    72  			if end {
    73  				e = elementNone
    74  			}
    75  			// We've found an HTML tag.
    76  			return context{state: stateTag, element: e}, j
    77  		}
    78  		k = j
    79  	}
    80  }
    81  
    82  var elementContentType = [...]state{
    83  	elementNone:     stateText,
    84  	elementScript:   stateJS,
    85  	elementStyle:    stateCSS,
    86  	elementTextarea: stateRCDATA,
    87  	elementTitle:    stateRCDATA,
    88  	elementMeta:     stateText,
    89  }
    90  
    91  // tTag is the context transition function for the tag state.
    92  func tTag(c context, s []byte) (context, int) {
    93  	// Find the attribute name.
    94  	i := eatWhiteSpace(s, 0)
    95  	if i == len(s) {
    96  		return c, len(s)
    97  	}
    98  	if s[i] == '>' {
    99  		// Treat <meta> specially, because it doesn't have an end tag, and we
   100  		// want to transition into the correct state/element for it.
   101  		if c.element == elementMeta {
   102  			return context{state: stateText, element: elementNone}, i + 1
   103  		}
   104  		return context{
   105  			state:   elementContentType[c.element],
   106  			element: c.element,
   107  		}, i + 1
   108  	}
   109  	j, err := eatAttrName(s, i)
   110  	if err != nil {
   111  		return context{state: stateError, err: err}, len(s)
   112  	}
   113  	state, attr := stateTag, attrNone
   114  	if i == j {
   115  		return context{
   116  			state: stateError,
   117  			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
   118  		}, len(s)
   119  	}
   120  
   121  	attrName := strings.ToLower(string(s[i:j]))
   122  	if c.element == elementScript && attrName == "type" {
   123  		attr = attrScriptType
   124  	} else if c.element == elementMeta && attrName == "content" {
   125  		attr = attrMetaContent
   126  	} else {
   127  		switch attrType(attrName) {
   128  		case contentTypeURL:
   129  			attr = attrURL
   130  		case contentTypeCSS:
   131  			attr = attrStyle
   132  		case contentTypeJS:
   133  			attr = attrScript
   134  		case contentTypeSrcset:
   135  			attr = attrSrcset
   136  		}
   137  	}
   138  
   139  	if j == len(s) {
   140  		state = stateAttrName
   141  	} else {
   142  		state = stateAfterName
   143  	}
   144  	return context{state: state, element: c.element, attr: attr}, j
   145  }
   146  
   147  // tAttrName is the context transition function for stateAttrName.
   148  func tAttrName(c context, s []byte) (context, int) {
   149  	i, err := eatAttrName(s, 0)
   150  	if err != nil {
   151  		return context{state: stateError, err: err}, len(s)
   152  	} else if i != len(s) {
   153  		c.state = stateAfterName
   154  	}
   155  	return c, i
   156  }
   157  
   158  // tAfterName is the context transition function for stateAfterName.
   159  func tAfterName(c context, s []byte) (context, int) {
   160  	// Look for the start of the value.
   161  	i := eatWhiteSpace(s, 0)
   162  	if i == len(s) {
   163  		return c, len(s)
   164  	} else if s[i] != '=' {
   165  		// Occurs due to tag ending '>', and valueless attribute.
   166  		c.state = stateTag
   167  		return c, i
   168  	}
   169  	c.state = stateBeforeValue
   170  	// Consume the "=".
   171  	return c, i + 1
   172  }
   173  
   174  var attrStartStates = [...]state{
   175  	attrNone:        stateAttr,
   176  	attrScript:      stateJS,
   177  	attrScriptType:  stateAttr,
   178  	attrStyle:       stateCSS,
   179  	attrURL:         stateURL,
   180  	attrSrcset:      stateSrcset,
   181  	attrMetaContent: stateMetaContent,
   182  }
   183  
   184  // tBeforeValue is the context transition function for stateBeforeValue.
   185  func tBeforeValue(c context, s []byte) (context, int) {
   186  	i := eatWhiteSpace(s, 0)
   187  	if i == len(s) {
   188  		return c, len(s)
   189  	}
   190  	// Find the attribute delimiter.
   191  	delim := delimSpaceOrTagEnd
   192  	switch s[i] {
   193  	case '\'':
   194  		delim, i = delimSingleQuote, i+1
   195  	case '"':
   196  		delim, i = delimDoubleQuote, i+1
   197  	}
   198  	c.state, c.delim = attrStartStates[c.attr], delim
   199  	return c, i
   200  }
   201  
   202  // tHTMLCmt is the context transition function for stateHTMLCmt.
   203  func tHTMLCmt(c context, s []byte) (context, int) {
   204  	if i := bytes.Index(s, commentEnd); i != -1 {
   205  		return context{}, i + 3
   206  	}
   207  	return c, len(s)
   208  }
   209  
   210  // specialTagEndMarkers maps element types to the character sequence that
   211  // case-insensitively signals the end of the special tag body.
   212  var specialTagEndMarkers = [...][]byte{
   213  	elementScript:   []byte("script"),
   214  	elementStyle:    []byte("style"),
   215  	elementTextarea: []byte("textarea"),
   216  	elementTitle:    []byte("title"),
   217  	elementMeta:     []byte(""),
   218  }
   219  
   220  var (
   221  	specialTagEndPrefix = []byte("</")
   222  	tagEndSeparators    = []byte("> \t\n\f/")
   223  )
   224  
   225  // tSpecialTagEnd is the context transition function for raw text and RCDATA
   226  // element states.
   227  func tSpecialTagEnd(c context, s []byte) (context, int) {
   228  	if c.element != elementNone {
   229  		// script end tags ("</script") within script literals are ignored, so that
   230  		// we can properly escape them.
   231  		if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
   232  			return c, len(s)
   233  		}
   234  		if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
   235  			return context{}, i
   236  		}
   237  	}
   238  	return c, len(s)
   239  }
   240  
   241  // indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
   242  func indexTagEnd(s []byte, tag []byte) int {
   243  	res := 0
   244  	plen := len(specialTagEndPrefix)
   245  	for len(s) > 0 {
   246  		// Try to find the tag end prefix first
   247  		i := bytes.Index(s, specialTagEndPrefix)
   248  		if i == -1 {
   249  			return i
   250  		}
   251  		s = s[i+plen:]
   252  		// Try to match the actual tag if there is still space for it
   253  		if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
   254  			s = s[len(tag):]
   255  			// Check the tag is followed by a proper separator
   256  			if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
   257  				return res + i
   258  			}
   259  			res += len(tag)
   260  		}
   261  		res += i + plen
   262  	}
   263  	return -1
   264  }
   265  
   266  // tAttr is the context transition function for the attribute state.
   267  func tAttr(c context, s []byte) (context, int) {
   268  	return c, len(s)
   269  }
   270  
   271  // tURL is the context transition function for the URL state.
   272  func tURL(c context, s []byte) (context, int) {
   273  	if bytes.ContainsAny(s, "#?") {
   274  		c.urlPart = urlPartQueryOrFrag
   275  	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
   276  		// HTML5 uses "Valid URL potentially surrounded by spaces" for
   277  		// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
   278  		c.urlPart = urlPartPreQuery
   279  	}
   280  	return c, len(s)
   281  }
   282  
   283  // tJS is the context transition function for the JS state.
   284  func tJS(c context, s []byte) (context, int) {
   285  	i := bytes.IndexAny(s, "\"`'/{}<-#")
   286  	if i == -1 {
   287  		// Entire input is non string, comment, regexp tokens.
   288  		c.jsCtx = nextJSCtx(s, c.jsCtx)
   289  		return c, len(s)
   290  	}
   291  	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
   292  	switch s[i] {
   293  	case '"':
   294  		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
   295  	case '\'':
   296  		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
   297  	case '`':
   298  		c.state, c.jsCtx = stateJSTmplLit, jsCtxRegexp
   299  	case '/':
   300  		switch {
   301  		case i+1 < len(s) && s[i+1] == '/':
   302  			c.state, i = stateJSLineCmt, i+1
   303  		case i+1 < len(s) && s[i+1] == '*':
   304  			c.state, i = stateJSBlockCmt, i+1
   305  		case c.jsCtx == jsCtxRegexp:
   306  			c.state = stateJSRegexp
   307  		case c.jsCtx == jsCtxDivOp:
   308  			c.jsCtx = jsCtxRegexp
   309  		default:
   310  			return context{
   311  				state: stateError,
   312  				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
   313  			}, len(s)
   314  		}
   315  	// ECMAScript supports HTML style comments for legacy reasons, see Appendix
   316  	// B.1.1 "HTML-like Comments". The handling of these comments is somewhat
   317  	// confusing. Multi-line comments are not supported, i.e. anything on lines
   318  	// between the opening and closing tokens is not considered a comment, but
   319  	// anything following the opening or closing token, on the same line, is
   320  	// ignored. As such we simply treat any line prefixed with "<!--" or "-->"
   321  	// as if it were actually prefixed with "//" and move on.
   322  	case '<':
   323  		if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
   324  			c.state, i = stateJSHTMLOpenCmt, i+3
   325  		}
   326  	case '-':
   327  		if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
   328  			c.state, i = stateJSHTMLCloseCmt, i+2
   329  		}
   330  	// ECMAScript also supports "hashbang" comment lines, see Section 12.5.
   331  	case '#':
   332  		if i+1 < len(s) && s[i+1] == '!' {
   333  			c.state, i = stateJSLineCmt, i+1
   334  		}
   335  	case '{':
   336  		// We only care about tracking brace depth if we are inside of a
   337  		// template literal.
   338  		if len(c.jsBraceDepth) == 0 {
   339  			return c, i + 1
   340  		}
   341  		c.jsBraceDepth[len(c.jsBraceDepth)-1]++
   342  	case '}':
   343  		if len(c.jsBraceDepth) == 0 {
   344  			return c, i + 1
   345  		}
   346  		// There are no cases where a brace can be escaped in the JS context
   347  		// that are not syntax errors, it seems. Because of this we can just
   348  		// count "\}" as "}" and move on, the script is already broken as
   349  		// fully fledged parsers will just fail anyway.
   350  		c.jsBraceDepth[len(c.jsBraceDepth)-1]--
   351  		if c.jsBraceDepth[len(c.jsBraceDepth)-1] >= 0 {
   352  			return c, i + 1
   353  		}
   354  		c.jsBraceDepth = c.jsBraceDepth[:len(c.jsBraceDepth)-1]
   355  		c.state = stateJSTmplLit
   356  	default:
   357  		panic("unreachable")
   358  	}
   359  	return c, i + 1
   360  }
   361  
   362  func tJSTmpl(c context, s []byte) (context, int) {
   363  	var k int
   364  	for {
   365  		i := k + bytes.IndexAny(s[k:], "`\\$")
   366  		if i < k {
   367  			break
   368  		}
   369  		switch s[i] {
   370  		case '\\':
   371  			i++
   372  			if i == len(s) {
   373  				return context{
   374  					state: stateError,
   375  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   376  				}, len(s)
   377  			}
   378  		case '$':
   379  			if len(s) >= i+2 && s[i+1] == '{' {
   380  				c.jsBraceDepth = append(c.jsBraceDepth, 0)
   381  				c.state = stateJS
   382  				return c, i + 2
   383  			}
   384  		case '`':
   385  			// end
   386  			c.state = stateJS
   387  			return c, i + 1
   388  		}
   389  		k = i + 1
   390  	}
   391  
   392  	return c, len(s)
   393  }
   394  
   395  // tJSDelimited is the context transition function for the JS string and regexp
   396  // states.
   397  func tJSDelimited(c context, s []byte) (context, int) {
   398  	specials := `\"`
   399  	switch c.state {
   400  	case stateJSSqStr:
   401  		specials = `\'`
   402  	case stateJSRegexp:
   403  		specials = `\/[]`
   404  	}
   405  
   406  	k, inCharset := 0, false
   407  	for {
   408  		i := k + bytes.IndexAny(s[k:], specials)
   409  		if i < k {
   410  			break
   411  		}
   412  		switch s[i] {
   413  		case '\\':
   414  			i++
   415  			if i == len(s) {
   416  				return context{
   417  					state: stateError,
   418  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
   419  				}, len(s)
   420  			}
   421  		case '[':
   422  			inCharset = true
   423  		case ']':
   424  			inCharset = false
   425  		case '/':
   426  			// If "</script" appears in a regex literal, the '/' should not
   427  			// close the regex literal, and it will later be escaped to
   428  			// "\x3C/script" in escapeText.
   429  			if i > 0 && i+7 <= len(s) && bytes.Equal(bytes.ToLower(s[i-1:i+7]), []byte("</script")) {
   430  				i++
   431  			} else if !inCharset {
   432  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   433  				return c, i + 1
   434  			}
   435  		default:
   436  			// end delimiter
   437  			if !inCharset {
   438  				c.state, c.jsCtx = stateJS, jsCtxDivOp
   439  				return c, i + 1
   440  			}
   441  		}
   442  		k = i + 1
   443  	}
   444  
   445  	if inCharset {
   446  		// This can be fixed by making context richer if interpolation
   447  		// into charsets is desired.
   448  		return context{
   449  			state: stateError,
   450  			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
   451  		}, len(s)
   452  	}
   453  
   454  	return c, len(s)
   455  }
   456  
   457  var blockCommentEnd = []byte("*/")
   458  
   459  // tBlockCmt is the context transition function for /*comment*/ states.
   460  func tBlockCmt(c context, s []byte) (context, int) {
   461  	i := bytes.Index(s, blockCommentEnd)
   462  	if i == -1 {
   463  		return c, len(s)
   464  	}
   465  	switch c.state {
   466  	case stateJSBlockCmt:
   467  		c.state = stateJS
   468  	case stateCSSBlockCmt:
   469  		c.state = stateCSS
   470  	default:
   471  		panic(c.state.String())
   472  	}
   473  	return c, i + 2
   474  }
   475  
   476  // tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
   477  func tLineCmt(c context, s []byte) (context, int) {
   478  	var lineTerminators string
   479  	var endState state
   480  	switch c.state {
   481  	case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
   482  		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
   483  	case stateCSSLineCmt:
   484  		lineTerminators, endState = "\n\f\r", stateCSS
   485  		// Line comments are not part of any published CSS standard but
   486  		// are supported by the 4 major browsers.
   487  		// This defines line comments as
   488  		//     LINECOMMENT ::= "//" [^\n\f\d]*
   489  		// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
   490  		// newlines:
   491  		//     nl ::= #xA | #xD #xA | #xD | #xC
   492  	default:
   493  		panic(c.state.String())
   494  	}
   495  
   496  	i := bytes.IndexAny(s, lineTerminators)
   497  	if i == -1 {
   498  		return c, len(s)
   499  	}
   500  	c.state = endState
   501  	// Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4
   502  	// "However, the LineTerminator at the end of the line is not
   503  	// considered to be part of the single-line comment; it is
   504  	// recognized separately by the lexical grammar and becomes part
   505  	// of the stream of input elements for the syntactic grammar."
   506  	return c, i
   507  }
   508  
   509  // tCSS is the context transition function for the CSS state.
   510  func tCSS(c context, s []byte) (context, int) {
   511  	// CSS quoted strings are almost never used except for:
   512  	// (1) URLs as in background: "/foo.png"
   513  	// (2) Multiword font-names as in font-family: "Times New Roman"
   514  	// (3) List separators in content values as in inline-lists:
   515  	//    <style>
   516  	//    ul.inlineList { list-style: none; padding:0 }
   517  	//    ul.inlineList > li { display: inline }
   518  	//    ul.inlineList > li:before { content: ", " }
   519  	//    ul.inlineList > li:first-child:before { content: "" }
   520  	//    </style>
   521  	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
   522  	// (4) Attribute value selectors as in a[href="http://example.com/"]
   523  	//
   524  	// We conservatively treat all strings as URLs, but make some
   525  	// allowances to avoid confusion.
   526  	//
   527  	// In (1), our conservative assumption is justified.
   528  	// In (2), valid font names do not contain ':', '?', or '#', so our
   529  	// conservative assumption is fine since we will never transition past
   530  	// urlPartPreQuery.
   531  	// In (3), our protocol heuristic should not be tripped, and there
   532  	// should not be non-space content after a '?' or '#', so as long as
   533  	// we only %-encode RFC 3986 reserved characters we are ok.
   534  	// In (4), we should URL escape for URL attributes, and for others we
   535  	// have the attribute name available if our conservative assumption
   536  	// proves problematic for real code.
   537  
   538  	k := 0
   539  	for {
   540  		i := k + bytes.IndexAny(s[k:], `("'/`)
   541  		if i < k {
   542  			return c, len(s)
   543  		}
   544  		switch s[i] {
   545  		case '(':
   546  			// Look for url to the left.
   547  			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
   548  			if endsWithCSSKeyword(p, "url") {
   549  				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
   550  				switch {
   551  				case j != len(s) && s[j] == '"':
   552  					c.state, j = stateCSSDqURL, j+1
   553  				case j != len(s) && s[j] == '\'':
   554  					c.state, j = stateCSSSqURL, j+1
   555  				default:
   556  					c.state = stateCSSURL
   557  				}
   558  				return c, j
   559  			}
   560  		case '/':
   561  			if i+1 < len(s) {
   562  				switch s[i+1] {
   563  				case '/':
   564  					c.state = stateCSSLineCmt
   565  					return c, i + 2
   566  				case '*':
   567  					c.state = stateCSSBlockCmt
   568  					return c, i + 2
   569  				}
   570  			}
   571  		case '"':
   572  			c.state = stateCSSDqStr
   573  			return c, i + 1
   574  		case '\'':
   575  			c.state = stateCSSSqStr
   576  			return c, i + 1
   577  		}
   578  		k = i + 1
   579  	}
   580  }
   581  
   582  // tCSSStr is the context transition function for the CSS string and URL states.
   583  func tCSSStr(c context, s []byte) (context, int) {
   584  	var endAndEsc string
   585  	switch c.state {
   586  	case stateCSSDqStr, stateCSSDqURL:
   587  		endAndEsc = `\"`
   588  	case stateCSSSqStr, stateCSSSqURL:
   589  		endAndEsc = `\'`
   590  	case stateCSSURL:
   591  		// Unquoted URLs end with a newline or close parenthesis.
   592  		// The below includes the wc (whitespace character) and nl.
   593  		endAndEsc = "\\\t\n\f\r )"
   594  	default:
   595  		panic(c.state.String())
   596  	}
   597  
   598  	k := 0
   599  	for {
   600  		i := k + bytes.IndexAny(s[k:], endAndEsc)
   601  		if i < k {
   602  			c, nread := tURL(c, decodeCSS(s[k:]))
   603  			return c, k + nread
   604  		}
   605  		if s[i] == '\\' {
   606  			i++
   607  			if i == len(s) {
   608  				return context{
   609  					state: stateError,
   610  					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
   611  				}, len(s)
   612  			}
   613  		} else {
   614  			c.state = stateCSS
   615  			return c, i + 1
   616  		}
   617  		c, _ = tURL(c, decodeCSS(s[:i+1]))
   618  		k = i + 1
   619  	}
   620  }
   621  
   622  // tError is the context transition function for the error state.
   623  func tError(c context, s []byte) (context, int) {
   624  	return c, len(s)
   625  }
   626  
   627  // tMetaContent is the context transition function for the meta content attribute state.
   628  func tMetaContent(c context, s []byte) (context, int) {
   629  	for i := 0; i < len(s); i++ {
   630  		if i+3 <= len(s)-1 && bytes.Equal(bytes.ToLower(s[i:i+4]), []byte("url=")) {
   631  			c.state = stateMetaContentURL
   632  			return c, i + 4
   633  		}
   634  	}
   635  	return c, len(s)
   636  }
   637  
   638  // tMetaContentURL is the context transition function for the "url=" part of a meta content attribute state.
   639  func tMetaContentURL(c context, s []byte) (context, int) {
   640  	for i := 0; i < len(s); i++ {
   641  		if s[i] == ';' {
   642  			c.state = stateMetaContent
   643  			return c, i + 1
   644  		}
   645  	}
   646  	return c, len(s)
   647  }
   648  
   649  // eatAttrName returns the largest j such that s[i:j] is an attribute name.
   650  // It returns an error if s[i:] does not look like it begins with an
   651  // attribute name, such as encountering a quote mark without a preceding
   652  // equals sign.
   653  func eatAttrName(s []byte, i int) (int, *Error) {
   654  	for j := i; j < len(s); j++ {
   655  		switch s[j] {
   656  		case ' ', '\t', '\n', '\f', '\r', '=', '>':
   657  			return j, nil
   658  		case '\'', '"', '<':
   659  			// These result in a parse warning in HTML5 and are
   660  			// indicative of serious problems if seen in an attr
   661  			// name in a template.
   662  			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
   663  		default:
   664  			// No-op.
   665  		}
   666  	}
   667  	return len(s), nil
   668  }
   669  
   670  var elementNameMap = map[string]element{
   671  	"script":   elementScript,
   672  	"style":    elementStyle,
   673  	"textarea": elementTextarea,
   674  	"title":    elementTitle,
   675  	"meta":     elementMeta,
   676  }
   677  
   678  // asciiAlpha reports whether c is an ASCII letter.
   679  func asciiAlpha(c byte) bool {
   680  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   681  }
   682  
   683  // asciiAlphaNum reports whether c is an ASCII letter or digit.
   684  func asciiAlphaNum(c byte) bool {
   685  	return asciiAlpha(c) || '0' <= c && c <= '9'
   686  }
   687  
   688  // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
   689  func eatTagName(s []byte, i int) (int, element) {
   690  	if i == len(s) || !asciiAlpha(s[i]) {
   691  		return i, elementNone
   692  	}
   693  	j := i + 1
   694  	for j < len(s) {
   695  		x := s[j]
   696  		if asciiAlphaNum(x) {
   697  			j++
   698  			continue
   699  		}
   700  		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
   701  		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
   702  			j += 2
   703  			continue
   704  		}
   705  		break
   706  	}
   707  	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
   708  }
   709  
   710  // eatWhiteSpace returns the largest j such that s[i:j] is white space.
   711  func eatWhiteSpace(s []byte, i int) int {
   712  	for j := i; j < len(s); j++ {
   713  		switch s[j] {
   714  		case ' ', '\t', '\n', '\f', '\r':
   715  			// No-op.
   716  		default:
   717  			return j
   718  		}
   719  	}
   720  	return len(s)
   721  }
   722
View as plain text