1
2
3
4
5 package template
6
7 import (
8 "bytes"
9 "strings"
10 )
11
12
13
14
15
16 var transitionFunc = [...]func(context, []byte) (context, int){
17 stateText: tText,
18 stateTag: tTag,
19 stateAttrName: tAttrName,
20 stateAfterName: tAfterName,
21 stateBeforeValue: tBeforeValue,
22 stateHTMLCmt: tHTMLCmt,
23 stateRCDATA: tSpecialTagEnd,
24 stateAttr: tAttr,
25 stateURL: tURL,
26 stateMetaContent: tMetaContent,
27 stateMetaContentURL: tMetaContentURL,
28 stateSrcset: tURL,
29 stateJS: tJS,
30 stateJSDqStr: tJSDelimited,
31 stateJSSqStr: tJSDelimited,
32 stateJSRegexp: tJSDelimited,
33 stateJSTmplLit: tJSTmpl,
34 stateJSBlockCmt: tBlockCmt,
35 stateJSLineCmt: tLineCmt,
36 stateJSHTMLOpenCmt: tLineCmt,
37 stateJSHTMLCloseCmt: tLineCmt,
38 stateCSS: tCSS,
39 stateCSSDqStr: tCSSStr,
40 stateCSSSqStr: tCSSStr,
41 stateCSSDqURL: tCSSStr,
42 stateCSSSqURL: tCSSStr,
43 stateCSSURL: tCSSStr,
44 stateCSSBlockCmt: tBlockCmt,
45 stateCSSLineCmt: tLineCmt,
46 stateError: tError,
47 }
48
49 var commentStart = []byte("<!--")
50 var commentEnd = []byte("-->")
51
52
53 func tText(c context, s []byte) (context, int) {
54 k := 0
55 for {
56 i := k + bytes.IndexByte(s[k:], '<')
57 if i < k || i+1 == len(s) {
58 return c, len(s)
59 } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
60 return context{state: stateHTMLCmt}, i + 4
61 }
62 i++
63 end := false
64 if s[i] == '/' {
65 if i+1 == len(s) {
66 return c, len(s)
67 }
68 end, i = true, i+1
69 }
70 j, e := eatTagName(s, i)
71 if j != i {
72 if end {
73 e = elementNone
74 }
75
76 return context{state: stateTag, element: e}, j
77 }
78 k = j
79 }
80 }
81
82 var elementContentType = [...]state{
83 elementNone: stateText,
84 elementScript: stateJS,
85 elementStyle: stateCSS,
86 elementTextarea: stateRCDATA,
87 elementTitle: stateRCDATA,
88 elementMeta: stateText,
89 }
90
91
92 func tTag(c context, s []byte) (context, int) {
93
94 i := eatWhiteSpace(s, 0)
95 if i == len(s) {
96 return c, len(s)
97 }
98 if s[i] == '>' {
99
100
101 if c.element == elementMeta {
102 return context{state: stateText, element: elementNone}, i + 1
103 }
104 return context{
105 state: elementContentType[c.element],
106 element: c.element,
107 }, i + 1
108 }
109 j, err := eatAttrName(s, i)
110 if err != nil {
111 return context{state: stateError, err: err}, len(s)
112 }
113 state, attr := stateTag, attrNone
114 if i == j {
115 return context{
116 state: stateError,
117 err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
118 }, len(s)
119 }
120
121 attrName := strings.ToLower(string(s[i:j]))
122 if c.element == elementScript && attrName == "type" {
123 attr = attrScriptType
124 } else if c.element == elementMeta && attrName == "content" {
125 attr = attrMetaContent
126 } else {
127 switch attrType(attrName) {
128 case contentTypeURL:
129 attr = attrURL
130 case contentTypeCSS:
131 attr = attrStyle
132 case contentTypeJS:
133 attr = attrScript
134 case contentTypeSrcset:
135 attr = attrSrcset
136 }
137 }
138
139 if j == len(s) {
140 state = stateAttrName
141 } else {
142 state = stateAfterName
143 }
144 return context{state: state, element: c.element, attr: attr}, j
145 }
146
147
148 func tAttrName(c context, s []byte) (context, int) {
149 i, err := eatAttrName(s, 0)
150 if err != nil {
151 return context{state: stateError, err: err}, len(s)
152 } else if i != len(s) {
153 c.state = stateAfterName
154 }
155 return c, i
156 }
157
158
159 func tAfterName(c context, s []byte) (context, int) {
160
161 i := eatWhiteSpace(s, 0)
162 if i == len(s) {
163 return c, len(s)
164 } else if s[i] != '=' {
165
166 c.state = stateTag
167 return c, i
168 }
169 c.state = stateBeforeValue
170
171 return c, i + 1
172 }
173
174 var attrStartStates = [...]state{
175 attrNone: stateAttr,
176 attrScript: stateJS,
177 attrScriptType: stateAttr,
178 attrStyle: stateCSS,
179 attrURL: stateURL,
180 attrSrcset: stateSrcset,
181 attrMetaContent: stateMetaContent,
182 }
183
184
185 func tBeforeValue(c context, s []byte) (context, int) {
186 i := eatWhiteSpace(s, 0)
187 if i == len(s) {
188 return c, len(s)
189 }
190
191 delim := delimSpaceOrTagEnd
192 switch s[i] {
193 case '\'':
194 delim, i = delimSingleQuote, i+1
195 case '"':
196 delim, i = delimDoubleQuote, i+1
197 }
198 c.state, c.delim = attrStartStates[c.attr], delim
199 return c, i
200 }
201
202
203 func tHTMLCmt(c context, s []byte) (context, int) {
204 if i := bytes.Index(s, commentEnd); i != -1 {
205 return context{}, i + 3
206 }
207 return c, len(s)
208 }
209
210
211
212 var specialTagEndMarkers = [...][]byte{
213 elementScript: []byte("script"),
214 elementStyle: []byte("style"),
215 elementTextarea: []byte("textarea"),
216 elementTitle: []byte("title"),
217 elementMeta: []byte(""),
218 }
219
220 var (
221 specialTagEndPrefix = []byte("</")
222 tagEndSeparators = []byte("> \t\n\f/")
223 )
224
225
226
227 func tSpecialTagEnd(c context, s []byte) (context, int) {
228 if c.element != elementNone {
229
230
231 if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
232 return c, len(s)
233 }
234 if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
235 return context{}, i
236 }
237 }
238 return c, len(s)
239 }
240
241
242 func indexTagEnd(s []byte, tag []byte) int {
243 res := 0
244 plen := len(specialTagEndPrefix)
245 for len(s) > 0 {
246
247 i := bytes.Index(s, specialTagEndPrefix)
248 if i == -1 {
249 return i
250 }
251 s = s[i+plen:]
252
253 if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
254 s = s[len(tag):]
255
256 if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
257 return res + i
258 }
259 res += len(tag)
260 }
261 res += i + plen
262 }
263 return -1
264 }
265
266
267 func tAttr(c context, s []byte) (context, int) {
268 return c, len(s)
269 }
270
271
272 func tURL(c context, s []byte) (context, int) {
273 if bytes.ContainsAny(s, "#?") {
274 c.urlPart = urlPartQueryOrFrag
275 } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
276
277
278 c.urlPart = urlPartPreQuery
279 }
280 return c, len(s)
281 }
282
283
284 func tJS(c context, s []byte) (context, int) {
285 i := bytes.IndexAny(s, "\"`'/{}<-#")
286 if i == -1 {
287
288 c.jsCtx = nextJSCtx(s, c.jsCtx)
289 return c, len(s)
290 }
291 c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
292 switch s[i] {
293 case '"':
294 c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
295 case '\'':
296 c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
297 case '`':
298 c.state, c.jsCtx = stateJSTmplLit, jsCtxRegexp
299 case '/':
300 switch {
301 case i+1 < len(s) && s[i+1] == '/':
302 c.state, i = stateJSLineCmt, i+1
303 case i+1 < len(s) && s[i+1] == '*':
304 c.state, i = stateJSBlockCmt, i+1
305 case c.jsCtx == jsCtxRegexp:
306 c.state = stateJSRegexp
307 case c.jsCtx == jsCtxDivOp:
308 c.jsCtx = jsCtxRegexp
309 default:
310 return context{
311 state: stateError,
312 err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
313 }, len(s)
314 }
315
316
317
318
319
320
321
322 case '<':
323 if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
324 c.state, i = stateJSHTMLOpenCmt, i+3
325 }
326 case '-':
327 if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
328 c.state, i = stateJSHTMLCloseCmt, i+2
329 }
330
331 case '#':
332 if i+1 < len(s) && s[i+1] == '!' {
333 c.state, i = stateJSLineCmt, i+1
334 }
335 case '{':
336
337
338 if len(c.jsBraceDepth) == 0 {
339 return c, i + 1
340 }
341 c.jsBraceDepth[len(c.jsBraceDepth)-1]++
342 case '}':
343 if len(c.jsBraceDepth) == 0 {
344 return c, i + 1
345 }
346
347
348
349
350 c.jsBraceDepth[len(c.jsBraceDepth)-1]--
351 if c.jsBraceDepth[len(c.jsBraceDepth)-1] >= 0 {
352 return c, i + 1
353 }
354 c.jsBraceDepth = c.jsBraceDepth[:len(c.jsBraceDepth)-1]
355 c.state = stateJSTmplLit
356 default:
357 panic("unreachable")
358 }
359 return c, i + 1
360 }
361
362 func tJSTmpl(c context, s []byte) (context, int) {
363 var k int
364 for {
365 i := k + bytes.IndexAny(s[k:], "`\\$")
366 if i < k {
367 break
368 }
369 switch s[i] {
370 case '\\':
371 i++
372 if i == len(s) {
373 return context{
374 state: stateError,
375 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
376 }, len(s)
377 }
378 case '$':
379 if len(s) >= i+2 && s[i+1] == '{' {
380 c.jsBraceDepth = append(c.jsBraceDepth, 0)
381 c.state = stateJS
382 return c, i + 2
383 }
384 case '`':
385
386 c.state = stateJS
387 return c, i + 1
388 }
389 k = i + 1
390 }
391
392 return c, len(s)
393 }
394
395
396
397 func tJSDelimited(c context, s []byte) (context, int) {
398 specials := `\"`
399 switch c.state {
400 case stateJSSqStr:
401 specials = `\'`
402 case stateJSRegexp:
403 specials = `\/[]`
404 }
405
406 k, inCharset := 0, false
407 for {
408 i := k + bytes.IndexAny(s[k:], specials)
409 if i < k {
410 break
411 }
412 switch s[i] {
413 case '\\':
414 i++
415 if i == len(s) {
416 return context{
417 state: stateError,
418 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
419 }, len(s)
420 }
421 case '[':
422 inCharset = true
423 case ']':
424 inCharset = false
425 case '/':
426
427
428
429 if i > 0 && i+7 <= len(s) && bytes.Equal(bytes.ToLower(s[i-1:i+7]), []byte("</script")) {
430 i++
431 } else if !inCharset {
432 c.state, c.jsCtx = stateJS, jsCtxDivOp
433 return c, i + 1
434 }
435 default:
436
437 if !inCharset {
438 c.state, c.jsCtx = stateJS, jsCtxDivOp
439 return c, i + 1
440 }
441 }
442 k = i + 1
443 }
444
445 if inCharset {
446
447
448 return context{
449 state: stateError,
450 err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
451 }, len(s)
452 }
453
454 return c, len(s)
455 }
456
457 var blockCommentEnd = []byte("*/")
458
459
460 func tBlockCmt(c context, s []byte) (context, int) {
461 i := bytes.Index(s, blockCommentEnd)
462 if i == -1 {
463 return c, len(s)
464 }
465 switch c.state {
466 case stateJSBlockCmt:
467 c.state = stateJS
468 case stateCSSBlockCmt:
469 c.state = stateCSS
470 default:
471 panic(c.state.String())
472 }
473 return c, i + 2
474 }
475
476
477 func tLineCmt(c context, s []byte) (context, int) {
478 var lineTerminators string
479 var endState state
480 switch c.state {
481 case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
482 lineTerminators, endState = "\n\r\u2028\u2029", stateJS
483 case stateCSSLineCmt:
484 lineTerminators, endState = "\n\f\r", stateCSS
485
486
487
488
489
490
491
492 default:
493 panic(c.state.String())
494 }
495
496 i := bytes.IndexAny(s, lineTerminators)
497 if i == -1 {
498 return c, len(s)
499 }
500 c.state = endState
501
502
503
504
505
506 return c, i
507 }
508
509
510 func tCSS(c context, s []byte) (context, int) {
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538 k := 0
539 for {
540 i := k + bytes.IndexAny(s[k:], `("'/`)
541 if i < k {
542 return c, len(s)
543 }
544 switch s[i] {
545 case '(':
546
547 p := bytes.TrimRight(s[:i], "\t\n\f\r ")
548 if endsWithCSSKeyword(p, "url") {
549 j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
550 switch {
551 case j != len(s) && s[j] == '"':
552 c.state, j = stateCSSDqURL, j+1
553 case j != len(s) && s[j] == '\'':
554 c.state, j = stateCSSSqURL, j+1
555 default:
556 c.state = stateCSSURL
557 }
558 return c, j
559 }
560 case '/':
561 if i+1 < len(s) {
562 switch s[i+1] {
563 case '/':
564 c.state = stateCSSLineCmt
565 return c, i + 2
566 case '*':
567 c.state = stateCSSBlockCmt
568 return c, i + 2
569 }
570 }
571 case '"':
572 c.state = stateCSSDqStr
573 return c, i + 1
574 case '\'':
575 c.state = stateCSSSqStr
576 return c, i + 1
577 }
578 k = i + 1
579 }
580 }
581
582
583 func tCSSStr(c context, s []byte) (context, int) {
584 var endAndEsc string
585 switch c.state {
586 case stateCSSDqStr, stateCSSDqURL:
587 endAndEsc = `\"`
588 case stateCSSSqStr, stateCSSSqURL:
589 endAndEsc = `\'`
590 case stateCSSURL:
591
592
593 endAndEsc = "\\\t\n\f\r )"
594 default:
595 panic(c.state.String())
596 }
597
598 k := 0
599 for {
600 i := k + bytes.IndexAny(s[k:], endAndEsc)
601 if i < k {
602 c, nread := tURL(c, decodeCSS(s[k:]))
603 return c, k + nread
604 }
605 if s[i] == '\\' {
606 i++
607 if i == len(s) {
608 return context{
609 state: stateError,
610 err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
611 }, len(s)
612 }
613 } else {
614 c.state = stateCSS
615 return c, i + 1
616 }
617 c, _ = tURL(c, decodeCSS(s[:i+1]))
618 k = i + 1
619 }
620 }
621
622
623 func tError(c context, s []byte) (context, int) {
624 return c, len(s)
625 }
626
627
628 func tMetaContent(c context, s []byte) (context, int) {
629 for i := 0; i < len(s); i++ {
630 if i+3 <= len(s)-1 && bytes.Equal(bytes.ToLower(s[i:i+4]), []byte("url=")) {
631 c.state = stateMetaContentURL
632 return c, i + 4
633 }
634 }
635 return c, len(s)
636 }
637
638
639 func tMetaContentURL(c context, s []byte) (context, int) {
640 for i := 0; i < len(s); i++ {
641 if s[i] == ';' {
642 c.state = stateMetaContent
643 return c, i + 1
644 }
645 }
646 return c, len(s)
647 }
648
649
650
651
652
653 func eatAttrName(s []byte, i int) (int, *Error) {
654 for j := i; j < len(s); j++ {
655 switch s[j] {
656 case ' ', '\t', '\n', '\f', '\r', '=', '>':
657 return j, nil
658 case '\'', '"', '<':
659
660
661
662 return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
663 default:
664
665 }
666 }
667 return len(s), nil
668 }
669
670 var elementNameMap = map[string]element{
671 "script": elementScript,
672 "style": elementStyle,
673 "textarea": elementTextarea,
674 "title": elementTitle,
675 "meta": elementMeta,
676 }
677
678
679 func asciiAlpha(c byte) bool {
680 return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
681 }
682
683
684 func asciiAlphaNum(c byte) bool {
685 return asciiAlpha(c) || '0' <= c && c <= '9'
686 }
687
688
689 func eatTagName(s []byte, i int) (int, element) {
690 if i == len(s) || !asciiAlpha(s[i]) {
691 return i, elementNone
692 }
693 j := i + 1
694 for j < len(s) {
695 x := s[j]
696 if asciiAlphaNum(x) {
697 j++
698 continue
699 }
700
701 if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
702 j += 2
703 continue
704 }
705 break
706 }
707 return j, elementNameMap[strings.ToLower(string(s[i:j]))]
708 }
709
710
711 func eatWhiteSpace(s []byte, i int) int {
712 for j := i; j < len(s); j++ {
713 switch s[j] {
714 case ' ', '\t', '\n', '\f', '\r':
715
716 default:
717 return j
718 }
719 }
720 return len(s)
721 }
722
View as plain text