Source file src/html/template/escape.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package template
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"html"
    11  	"internal/godebug"
    12  	"io"
    13  	"maps"
    14  	"regexp"
    15  	"text/template"
    16  	"text/template/parse"
    17  )
    18  
    19  // escapeTemplate rewrites the named template, which must be
    20  // associated with t, to guarantee that the output of any of the named
    21  // templates is properly escaped. If no error is returned, then the named templates have
    22  // been modified. Otherwise the named templates have been rendered
    23  // unusable.
    24  func escapeTemplate(tmpl *Template, node parse.Node, name string) error {
    25  	c, _ := tmpl.esc.escapeTree(context{}, node, name, 0)
    26  	var err error
    27  	if c.err != nil {
    28  		err, c.err.Name = c.err, name
    29  	} else if c.state != stateText {
    30  		err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)}
    31  	}
    32  	if err != nil {
    33  		// Prevent execution of unsafe templates.
    34  		if t := tmpl.set[name]; t != nil {
    35  			t.escapeErr = err
    36  			t.text.Tree = nil
    37  			t.Tree = nil
    38  		}
    39  		return err
    40  	}
    41  	tmpl.esc.commit()
    42  	if t := tmpl.set[name]; t != nil {
    43  		t.escapeErr = escapeOK
    44  		t.Tree = t.text.Tree
    45  	}
    46  	return nil
    47  }
    48  
    49  // evalArgs formats the list of arguments into a string. It is equivalent to
    50  // fmt.Sprint(args...), except that it dereferences all pointers.
    51  func evalArgs(args ...any) string {
    52  	// Optimization for simple common case of a single string argument.
    53  	if len(args) == 1 {
    54  		if s, ok := args[0].(string); ok {
    55  			return s
    56  		}
    57  	}
    58  	for i, arg := range args {
    59  		args[i] = indirectToStringerOrError(arg)
    60  	}
    61  	return fmt.Sprint(args...)
    62  }
    63  
    64  // funcMap maps command names to functions that render their inputs safe.
    65  var funcMap = template.FuncMap{
    66  	"_html_template_attrescaper":      attrEscaper,
    67  	"_html_template_commentescaper":   commentEscaper,
    68  	"_html_template_cssescaper":       cssEscaper,
    69  	"_html_template_cssvaluefilter":   cssValueFilter,
    70  	"_html_template_htmlnamefilter":   htmlNameFilter,
    71  	"_html_template_htmlescaper":      htmlEscaper,
    72  	"_html_template_jsregexpescaper":  jsRegexpEscaper,
    73  	"_html_template_jsstrescaper":     jsStrEscaper,
    74  	"_html_template_jstmpllitescaper": jsTmplLitEscaper,
    75  	"_html_template_jsvalescaper":     jsValEscaper,
    76  	"_html_template_nospaceescaper":   htmlNospaceEscaper,
    77  	"_html_template_rcdataescaper":    rcdataEscaper,
    78  	"_html_template_srcsetescaper":    srcsetFilterAndEscaper,
    79  	"_html_template_urlescaper":       urlEscaper,
    80  	"_html_template_urlfilter":        urlFilter,
    81  	"_html_template_urlnormalizer":    urlNormalizer,
    82  	"_eval_args_":                     evalArgs,
    83  }
    84  
    85  // escaper collects type inferences about templates and changes needed to make
    86  // templates injection safe.
    87  type escaper struct {
    88  	// ns is the nameSpace that this escaper is associated with.
    89  	ns *nameSpace
    90  	// output[templateName] is the output context for a templateName that
    91  	// has been mangled to include its input context.
    92  	output map[string]context
    93  	// derived[c.mangle(name)] maps to a template derived from the template
    94  	// named name templateName for the start context c.
    95  	derived map[string]*template.Template
    96  	// called[templateName] is a set of called mangled template names.
    97  	called map[string]bool
    98  	// xxxNodeEdits are the accumulated edits to apply during commit.
    99  	// Such edits are not applied immediately in case a template set
   100  	// executes a given template in different escaping contexts.
   101  	actionNodeEdits   map[*parse.ActionNode][]string
   102  	templateNodeEdits map[*parse.TemplateNode]string
   103  	textNodeEdits     map[*parse.TextNode][]byte
   104  	// rangeContext holds context about the current range loop.
   105  	rangeContext *rangeContext
   106  }
   107  
   108  // rangeContext holds information about the current range loop.
   109  type rangeContext struct {
   110  	outer     *rangeContext // outer loop
   111  	breaks    []context     // context at each break action
   112  	continues []context     // context at each continue action
   113  }
   114  
   115  // makeEscaper creates a blank escaper for the given set.
   116  func makeEscaper(n *nameSpace) escaper {
   117  	return escaper{
   118  		n,
   119  		map[string]context{},
   120  		map[string]*template.Template{},
   121  		map[string]bool{},
   122  		map[*parse.ActionNode][]string{},
   123  		map[*parse.TemplateNode]string{},
   124  		map[*parse.TextNode][]byte{},
   125  		nil,
   126  	}
   127  }
   128  
   129  // filterFailsafe is an innocuous word that is emitted in place of unsafe values
   130  // by sanitizer functions. It is not a keyword in any programming language,
   131  // contains no special characters, is not empty, and when it appears in output
   132  // it is distinct enough that a developer can find the source of the problem
   133  // via a search engine.
   134  const filterFailsafe = "ZgotmplZ"
   135  
   136  // escape escapes a template node.
   137  func (e *escaper) escape(c context, n parse.Node) context {
   138  	switch n := n.(type) {
   139  	case *parse.ActionNode:
   140  		return e.escapeAction(c, n)
   141  	case *parse.BreakNode:
   142  		c.n = n
   143  		e.rangeContext.breaks = append(e.rangeContext.breaks, c)
   144  		return context{state: stateDead}
   145  	case *parse.CommentNode:
   146  		return c
   147  	case *parse.ContinueNode:
   148  		c.n = n
   149  		e.rangeContext.continues = append(e.rangeContext.continues, c)
   150  		return context{state: stateDead}
   151  	case *parse.IfNode:
   152  		return e.escapeBranch(c, &n.BranchNode, "if")
   153  	case *parse.ListNode:
   154  		return e.escapeList(c, n)
   155  	case *parse.RangeNode:
   156  		return e.escapeBranch(c, &n.BranchNode, "range")
   157  	case *parse.TemplateNode:
   158  		return e.escapeTemplate(c, n)
   159  	case *parse.TextNode:
   160  		return e.escapeText(c, n)
   161  	case *parse.WithNode:
   162  		return e.escapeBranch(c, &n.BranchNode, "with")
   163  	}
   164  	panic("escaping " + n.String() + " is unimplemented")
   165  }
   166  
   167  var debugAllowActionJSTmpl = godebug.New("jstmpllitinterp")
   168  
   169  var htmlmetacontenturlescape = godebug.New("htmlmetacontenturlescape")
   170  
   171  // escapeAction escapes an action template node.
   172  func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
   173  	if len(n.Pipe.Decl) != 0 {
   174  		// A local variable assignment, not an interpolation.
   175  		return c
   176  	}
   177  	c = nudge(c)
   178  	// Check for disallowed use of predefined escapers in the pipeline.
   179  	for pos, idNode := range n.Pipe.Cmds {
   180  		node, ok := idNode.Args[0].(*parse.IdentifierNode)
   181  		if !ok {
   182  			// A predefined escaper "esc" will never be found as an identifier in a
   183  			// Chain or Field node, since:
   184  			// - "esc.x ..." is invalid, since predefined escapers return strings, and
   185  			//   strings do not have methods, keys or fields.
   186  			// - "... .esc" is invalid, since predefined escapers are global functions,
   187  			//   not methods or fields of any types.
   188  			// Therefore, it is safe to ignore these two node types.
   189  			continue
   190  		}
   191  		ident := node.Ident
   192  		if _, ok := predefinedEscapers[ident]; ok {
   193  			if pos < len(n.Pipe.Cmds)-1 ||
   194  				c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
   195  				return context{
   196  					state: stateError,
   197  					err:   errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
   198  				}
   199  			}
   200  		}
   201  	}
   202  	s := make([]string, 0, 3)
   203  	switch c.state {
   204  	case stateError:
   205  		return c
   206  	case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
   207  		switch c.urlPart {
   208  		case urlPartNone:
   209  			s = append(s, "_html_template_urlfilter")
   210  			fallthrough
   211  		case urlPartPreQuery:
   212  			switch c.state {
   213  			case stateCSSDqStr, stateCSSSqStr:
   214  				s = append(s, "_html_template_cssescaper")
   215  			default:
   216  				s = append(s, "_html_template_urlnormalizer")
   217  			}
   218  		case urlPartQueryOrFrag:
   219  			s = append(s, "_html_template_urlescaper")
   220  		case urlPartUnknown:
   221  			return context{
   222  				state: stateError,
   223  				err:   errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
   224  			}
   225  		default:
   226  			panic(c.urlPart.String())
   227  		}
   228  	case stateMetaContent:
   229  		// Handled below in delim check.
   230  	case stateMetaContentURL:
   231  		if htmlmetacontenturlescape.Value() != "0" {
   232  			s = append(s, "_html_template_urlfilter")
   233  		} else {
   234  			// We don't have a great place to increment this, since it's hard to
   235  			// know if we actually escape any urls in _html_template_urlfilter,
   236  			// since it has no information about what context it is being
   237  			// executed in etc. This is probably the best we can do.
   238  			htmlmetacontenturlescape.IncNonDefault()
   239  		}
   240  	case stateJS:
   241  		s = append(s, "_html_template_jsvalescaper")
   242  		// A slash after a value starts a div operator.
   243  		c.jsCtx = jsCtxDivOp
   244  	case stateJSDqStr, stateJSSqStr:
   245  		s = append(s, "_html_template_jsstrescaper")
   246  	case stateJSTmplLit:
   247  		s = append(s, "_html_template_jstmpllitescaper")
   248  	case stateJSRegexp:
   249  		s = append(s, "_html_template_jsregexpescaper")
   250  	case stateCSS:
   251  		s = append(s, "_html_template_cssvaluefilter")
   252  	case stateText:
   253  		s = append(s, "_html_template_htmlescaper")
   254  	case stateRCDATA:
   255  		s = append(s, "_html_template_rcdataescaper")
   256  	case stateAttr:
   257  		// Handled below in delim check.
   258  	case stateAttrName, stateTag:
   259  		c.state = stateAttrName
   260  		s = append(s, "_html_template_htmlnamefilter")
   261  	case stateSrcset:
   262  		s = append(s, "_html_template_srcsetescaper")
   263  	default:
   264  		if isComment(c.state) {
   265  			s = append(s, "_html_template_commentescaper")
   266  		} else {
   267  			panic("unexpected state " + c.state.String())
   268  		}
   269  	}
   270  	switch c.delim {
   271  	case delimNone:
   272  		// No extra-escaping needed for raw text content.
   273  	case delimSpaceOrTagEnd:
   274  		s = append(s, "_html_template_nospaceescaper")
   275  	default:
   276  		s = append(s, "_html_template_attrescaper")
   277  	}
   278  	e.editActionNode(n, s)
   279  	return c
   280  }
   281  
   282  // ensurePipelineContains ensures that the pipeline ends with the commands with
   283  // the identifiers in s in order. If the pipeline ends with a predefined escaper
   284  // (i.e. "html" or "urlquery"), merge it with the identifiers in s.
   285  func ensurePipelineContains(p *parse.PipeNode, s []string) {
   286  	if len(s) == 0 {
   287  		// Do not rewrite pipeline if we have no escapers to insert.
   288  		return
   289  	}
   290  	// Precondition: p.Cmds contains at most one predefined escaper and the
   291  	// escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
   292  	// always true because of the checks in escapeAction.
   293  	pipelineLen := len(p.Cmds)
   294  	if pipelineLen > 0 {
   295  		lastCmd := p.Cmds[pipelineLen-1]
   296  		if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
   297  			if esc := idNode.Ident; predefinedEscapers[esc] {
   298  				// Pipeline ends with a predefined escaper.
   299  				if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
   300  					// Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
   301  					// where esc is the predefined escaper, and arg1...argN are its arguments.
   302  					// Convert this into the equivalent form
   303  					// {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
   304  					// merged with the escapers in s.
   305  					lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
   306  					p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
   307  					pipelineLen++
   308  				}
   309  				// If any of the commands in s that we are about to insert is equivalent
   310  				// to the predefined escaper, use the predefined escaper instead.
   311  				dup := false
   312  				for i, escaper := range s {
   313  					if escFnsEq(esc, escaper) {
   314  						s[i] = idNode.Ident
   315  						dup = true
   316  					}
   317  				}
   318  				if dup {
   319  					// The predefined escaper will already be inserted along with the
   320  					// escapers in s, so do not copy it to the rewritten pipeline.
   321  					pipelineLen--
   322  				}
   323  			}
   324  		}
   325  	}
   326  	// Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
   327  	newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
   328  	insertedIdents := make(map[string]bool)
   329  	for i := 0; i < pipelineLen; i++ {
   330  		cmd := p.Cmds[i]
   331  		newCmds[i] = cmd
   332  		if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
   333  			insertedIdents[normalizeEscFn(idNode.Ident)] = true
   334  		}
   335  	}
   336  	for _, name := range s {
   337  		if !insertedIdents[normalizeEscFn(name)] {
   338  			// When two templates share an underlying parse tree via the use of
   339  			// AddParseTree and one template is executed after the other, this check
   340  			// ensures that escapers that were already inserted into the pipeline on
   341  			// the first escaping pass do not get inserted again.
   342  			newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
   343  		}
   344  	}
   345  	p.Cmds = newCmds
   346  }
   347  
   348  // predefinedEscapers contains template predefined escapers that are equivalent
   349  // to some contextual escapers. Keep in sync with equivEscapers.
   350  var predefinedEscapers = map[string]bool{
   351  	"html":     true,
   352  	"urlquery": true,
   353  }
   354  
   355  // equivEscapers matches contextual escapers to equivalent predefined
   356  // template escapers.
   357  var equivEscapers = map[string]string{
   358  	// The following pairs of HTML escapers provide equivalent security
   359  	// guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'.
   360  	"_html_template_attrescaper":   "html",
   361  	"_html_template_htmlescaper":   "html",
   362  	"_html_template_rcdataescaper": "html",
   363  	// These two URL escapers produce URLs safe for embedding in a URL query by
   364  	// percent-encoding all the reserved characters specified in RFC 3986 Section
   365  	// 2.2
   366  	"_html_template_urlescaper": "urlquery",
   367  	// These two functions are not actually equivalent; urlquery is stricter as it
   368  	// escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer
   369  	// does not. It is therefore only safe to replace _html_template_urlnormalizer
   370  	// with urlquery (this happens in ensurePipelineContains), but not the otherI've
   371  	// way around. We keep this entry around to preserve the behavior of templates
   372  	// written before Go 1.9, which might depend on this substitution taking place.
   373  	"_html_template_urlnormalizer": "urlquery",
   374  }
   375  
   376  // escFnsEq reports whether the two escaping functions are equivalent.
   377  func escFnsEq(a, b string) bool {
   378  	return normalizeEscFn(a) == normalizeEscFn(b)
   379  }
   380  
   381  // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of
   382  // escaper functions a and b that are equivalent.
   383  func normalizeEscFn(e string) string {
   384  	if norm := equivEscapers[e]; norm != "" {
   385  		return norm
   386  	}
   387  	return e
   388  }
   389  
   390  // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
   391  // for all x.
   392  var redundantFuncs = map[string]map[string]bool{
   393  	"_html_template_commentescaper": {
   394  		"_html_template_attrescaper": true,
   395  		"_html_template_htmlescaper": true,
   396  	},
   397  	"_html_template_cssescaper": {
   398  		"_html_template_attrescaper": true,
   399  	},
   400  	"_html_template_jsregexpescaper": {
   401  		"_html_template_attrescaper": true,
   402  	},
   403  	"_html_template_jsstrescaper": {
   404  		"_html_template_attrescaper": true,
   405  	},
   406  	"_html_template_jstmpllitescaper": {
   407  		"_html_template_attrescaper": true,
   408  	},
   409  	"_html_template_urlescaper": {
   410  		"_html_template_urlnormalizer": true,
   411  	},
   412  }
   413  
   414  // appendCmd appends the given command to the end of the command pipeline
   415  // unless it is redundant with the last command.
   416  func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
   417  	if n := len(cmds); n != 0 {
   418  		last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
   419  		next, okNext := cmd.Args[0].(*parse.IdentifierNode)
   420  		if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
   421  			return cmds
   422  		}
   423  	}
   424  	return append(cmds, cmd)
   425  }
   426  
   427  // newIdentCmd produces a command containing a single identifier node.
   428  func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
   429  	return &parse.CommandNode{
   430  		NodeType: parse.NodeCommand,
   431  		Args:     []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
   432  	}
   433  }
   434  
   435  // nudge returns the context that would result from following empty string
   436  // transitions from the input context.
   437  // For example, parsing:
   438  //
   439  //	`<a href=`
   440  //
   441  // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
   442  //
   443  //	`<a href=x`
   444  //
   445  // will end in context{stateURL, delimSpaceOrTagEnd, ...}.
   446  // There are two transitions that happen when the 'x' is seen:
   447  // (1) Transition from a before-value state to a start-of-value state without
   448  //
   449  //	consuming any character.
   450  //
   451  // (2) Consume 'x' and transition past the first value character.
   452  // In this case, nudging produces the context after (1) happens.
   453  func nudge(c context) context {
   454  	switch c.state {
   455  	case stateTag:
   456  		// In `<foo {{.}}`, the action should emit an attribute.
   457  		c.state = stateAttrName
   458  	case stateBeforeValue:
   459  		// In `<foo bar={{.}}`, the action is an undelimited value.
   460  		c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
   461  	case stateAfterName:
   462  		// In `<foo bar {{.}}`, the action is an attribute name.
   463  		c.state, c.attr = stateAttrName, attrNone
   464  	}
   465  	return c
   466  }
   467  
   468  // join joins the two contexts of a branch template node. The result is an
   469  // error context if either of the input contexts are error contexts, or if the
   470  // input contexts differ.
   471  func join(a, b context, node parse.Node, nodeName string) context {
   472  	if a.state == stateError {
   473  		return a
   474  	}
   475  	if b.state == stateError {
   476  		return b
   477  	}
   478  	if a.state == stateDead {
   479  		return b
   480  	}
   481  	if b.state == stateDead {
   482  		return a
   483  	}
   484  	if a.eq(b) {
   485  		return a
   486  	}
   487  
   488  	c := a
   489  	c.urlPart = b.urlPart
   490  	if c.eq(b) {
   491  		// The contexts differ only by urlPart.
   492  		c.urlPart = urlPartUnknown
   493  		return c
   494  	}
   495  
   496  	c = a
   497  	c.jsCtx = b.jsCtx
   498  	if c.eq(b) {
   499  		// The contexts differ only by jsCtx.
   500  		c.jsCtx = jsCtxUnknown
   501  		return c
   502  	}
   503  
   504  	// Allow a nudged context to join with an unnudged one.
   505  	// This means that
   506  	//   <p title={{if .C}}{{.}}{{end}}
   507  	// ends in an unquoted value state even though the else branch
   508  	// ends in stateBeforeValue.
   509  	if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
   510  		if e := join(c, d, node, nodeName); e.state != stateError {
   511  			return e
   512  		}
   513  	}
   514  
   515  	return context{
   516  		state: stateError,
   517  		err:   errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
   518  	}
   519  }
   520  
   521  // escapeBranch escapes a branch template node: "if", "range" and "with".
   522  func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
   523  	if nodeName == "range" {
   524  		e.rangeContext = &rangeContext{outer: e.rangeContext}
   525  	}
   526  	c0 := e.escapeList(c, n.List)
   527  	if nodeName == "range" {
   528  		if c0.state != stateError {
   529  			c0 = joinRange(c0, e.rangeContext)
   530  		}
   531  		e.rangeContext = e.rangeContext.outer
   532  		if c0.state == stateError {
   533  			return c0
   534  		}
   535  
   536  		// The "true" branch of a "range" node can execute multiple times.
   537  		// We check that executing n.List once results in the same context
   538  		// as executing n.List twice.
   539  		e.rangeContext = &rangeContext{outer: e.rangeContext}
   540  		c1, _ := e.escapeListConditionally(c0, n.List, nil)
   541  		c0 = join(c0, c1, n, nodeName)
   542  		if c0.state == stateError {
   543  			e.rangeContext = e.rangeContext.outer
   544  			// Make clear that this is a problem on loop re-entry
   545  			// since developers tend to overlook that branch when
   546  			// debugging templates.
   547  			c0.err.Line = n.Line
   548  			c0.err.Description = "on range loop re-entry: " + c0.err.Description
   549  			return c0
   550  		}
   551  		c0 = joinRange(c0, e.rangeContext)
   552  		e.rangeContext = e.rangeContext.outer
   553  		if c0.state == stateError {
   554  			return c0
   555  		}
   556  	}
   557  	c1 := e.escapeList(c, n.ElseList)
   558  	return join(c0, c1, n, nodeName)
   559  }
   560  
   561  func joinRange(c0 context, rc *rangeContext) context {
   562  	// Merge contexts at break and continue statements into overall body context.
   563  	// In theory we could treat breaks differently from continues, but for now it is
   564  	// enough to treat them both as going back to the start of the loop (which may then stop).
   565  	for _, c := range rc.breaks {
   566  		c0 = join(c0, c, c.n, "range")
   567  		if c0.state == stateError {
   568  			c0.err.Line = c.n.(*parse.BreakNode).Line
   569  			c0.err.Description = "at range loop break: " + c0.err.Description
   570  			return c0
   571  		}
   572  	}
   573  	for _, c := range rc.continues {
   574  		c0 = join(c0, c, c.n, "range")
   575  		if c0.state == stateError {
   576  			c0.err.Line = c.n.(*parse.ContinueNode).Line
   577  			c0.err.Description = "at range loop continue: " + c0.err.Description
   578  			return c0
   579  		}
   580  	}
   581  	return c0
   582  }
   583  
   584  // escapeList escapes a list template node.
   585  func (e *escaper) escapeList(c context, n *parse.ListNode) context {
   586  	if n == nil {
   587  		return c
   588  	}
   589  	for _, m := range n.Nodes {
   590  		c = e.escape(c, m)
   591  		if c.state == stateDead {
   592  			break
   593  		}
   594  	}
   595  	return c
   596  }
   597  
   598  // escapeListConditionally escapes a list node but only preserves edits and
   599  // inferences in e if the inferences and output context satisfy filter.
   600  // It returns the best guess at an output context, and the result of the filter
   601  // which is the same as whether e was updated.
   602  func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
   603  	e1 := makeEscaper(e.ns)
   604  	e1.rangeContext = e.rangeContext
   605  	// Make type inferences available to f.
   606  	maps.Copy(e1.output, e.output)
   607  	c = e1.escapeList(c, n)
   608  	ok := filter != nil && filter(&e1, c)
   609  	if ok {
   610  		// Copy inferences and edits from e1 back into e.
   611  		maps.Copy(e.output, e1.output)
   612  		maps.Copy(e.derived, e1.derived)
   613  		maps.Copy(e.called, e1.called)
   614  		for k, v := range e1.actionNodeEdits {
   615  			e.editActionNode(k, v)
   616  		}
   617  		for k, v := range e1.templateNodeEdits {
   618  			e.editTemplateNode(k, v)
   619  		}
   620  		for k, v := range e1.textNodeEdits {
   621  			e.editTextNode(k, v)
   622  		}
   623  	}
   624  	return c, ok
   625  }
   626  
   627  // escapeTemplate escapes a {{template}} call node.
   628  func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
   629  	c, name := e.escapeTree(c, n, n.Name, n.Line)
   630  	if name != n.Name {
   631  		e.editTemplateNode(n, name)
   632  	}
   633  	return c
   634  }
   635  
   636  // escapeTree escapes the named template starting in the given context as
   637  // necessary and returns its output context.
   638  func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) {
   639  	// Mangle the template name with the input context to produce a reliable
   640  	// identifier.
   641  	dname := c.mangle(name)
   642  	e.called[dname] = true
   643  	if out, ok := e.output[dname]; ok {
   644  		// Already escaped.
   645  		return out, dname
   646  	}
   647  	t := e.template(name)
   648  	if t == nil {
   649  		// Two cases: The template exists but is empty, or has never been mentioned at
   650  		// all. Distinguish the cases in the error messages.
   651  		if e.ns.set[name] != nil {
   652  			return context{
   653  				state: stateError,
   654  				err:   errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name),
   655  			}, dname
   656  		}
   657  		return context{
   658  			state: stateError,
   659  			err:   errorf(ErrNoSuchTemplate, node, line, "no such template %q", name),
   660  		}, dname
   661  	}
   662  	if dname != name {
   663  		// Use any template derived during an earlier call to escapeTemplate
   664  		// with different top level templates, or clone if necessary.
   665  		dt := e.template(dname)
   666  		if dt == nil {
   667  			dt = template.New(dname)
   668  			dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()}
   669  			e.derived[dname] = dt
   670  		}
   671  		t = dt
   672  	}
   673  	return e.computeOutCtx(c, t), dname
   674  }
   675  
   676  // computeOutCtx takes a template and its start context and computes the output
   677  // context while storing any inferences in e.
   678  func (e *escaper) computeOutCtx(c context, t *template.Template) context {
   679  	// Propagate context over the body.
   680  	c1, ok := e.escapeTemplateBody(c, t)
   681  	if !ok {
   682  		// Look for a fixed point by assuming c1 as the output context.
   683  		if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
   684  			c1, ok = c2, true
   685  		}
   686  		// Use c1 as the error context if neither assumption worked.
   687  	}
   688  	if !ok && c1.state != stateError {
   689  		return context{
   690  			state: stateError,
   691  			err:   errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()),
   692  		}
   693  	}
   694  	return c1
   695  }
   696  
   697  // escapeTemplateBody escapes the given template assuming the given output
   698  // context, and returns the best guess at the output context and whether the
   699  // assumption was correct.
   700  func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) {
   701  	filter := func(e1 *escaper, c1 context) bool {
   702  		if c1.state == stateError {
   703  			// Do not update the input escaper, e.
   704  			return false
   705  		}
   706  		if !e1.called[t.Name()] {
   707  			// If t is not recursively called, then c1 is an
   708  			// accurate output context.
   709  			return true
   710  		}
   711  		// c1 is accurate if it matches our assumed output context.
   712  		return c.eq(c1)
   713  	}
   714  	// We need to assume an output context so that recursive template calls
   715  	// take the fast path out of escapeTree instead of infinitely recurring.
   716  	// Naively assuming that the input context is the same as the output
   717  	// works >90% of the time.
   718  	e.output[t.Name()] = c
   719  	return e.escapeListConditionally(c, t.Tree.Root, filter)
   720  }
   721  
   722  // delimEnds maps each delim to a string of characters that terminate it.
   723  var delimEnds = [...]string{
   724  	delimDoubleQuote: `"`,
   725  	delimSingleQuote: "'",
   726  	// Determined empirically by running the below in various browsers.
   727  	// var div = document.createElement("DIV");
   728  	// for (var i = 0; i < 0x10000; ++i) {
   729  	//   div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
   730  	//   if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
   731  	//     document.write("<p>U+" + i.toString(16));
   732  	// }
   733  	delimSpaceOrTagEnd: " \t\n\f\r>",
   734  }
   735  
   736  var (
   737  	// Per WHATWG HTML specification, section 4.12.1.3, there are extremely
   738  	// complicated rules for how to handle the set of opening tags <!--,
   739  	// <script, and </script when they appear in JS literals (i.e. strings,
   740  	// regexs, and comments). The specification suggests a simple solution,
   741  	// rather than implementing the arcane ABNF, which involves simply escaping
   742  	// the opening bracket with \x3C. We use the below regex for this, since it
   743  	// makes doing the case-insensitive find-replace much simpler.
   744  	specialScriptTagRE          = regexp.MustCompile("(?i)<(script|/script|!--)")
   745  	specialScriptTagReplacement = []byte("\\x3C$1")
   746  )
   747  
   748  func containsSpecialScriptTag(s []byte) bool {
   749  	return specialScriptTagRE.Match(s)
   750  }
   751  
   752  func escapeSpecialScriptTags(s []byte) []byte {
   753  	return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
   754  }
   755  
   756  var doctypeBytes = []byte("<!DOCTYPE")
   757  
   758  // escapeText escapes a text template node.
   759  func (e *escaper) escapeText(c context, n *parse.TextNode) context {
   760  	s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
   761  	for i != len(s) {
   762  		c1, nread := contextAfterText(c, s[i:])
   763  		i1 := i + nread
   764  		if c.state == stateText || c.state == stateRCDATA {
   765  			end := i1
   766  			if c1.state != c.state {
   767  				for j := end - 1; j >= i; j-- {
   768  					if s[j] == '<' {
   769  						end = j
   770  						break
   771  					}
   772  				}
   773  			}
   774  			for j := i; j < end; j++ {
   775  				if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
   776  					b.Write(s[written:j])
   777  					b.WriteString("&lt;")
   778  					written = j + 1
   779  				}
   780  			}
   781  		} else if isComment(c.state) && c.delim == delimNone {
   782  			switch c.state {
   783  			case stateJSBlockCmt:
   784  				// https://es5.github.io/#x7.4:
   785  				// "Comments behave like white space and are
   786  				// discarded except that, if a MultiLineComment
   787  				// contains a line terminator character, then
   788  				// the entire comment is considered to be a
   789  				// LineTerminator for purposes of parsing by
   790  				// the syntactic grammar."
   791  				if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
   792  					b.WriteByte('\n')
   793  				} else {
   794  					b.WriteByte(' ')
   795  				}
   796  			case stateCSSBlockCmt:
   797  				b.WriteByte(' ')
   798  			}
   799  			written = i1
   800  		}
   801  		if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone {
   802  			// Preserve the portion between written and the comment start.
   803  			cs := i1 - 2
   804  			if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt {
   805  				// "<!--" instead of "/*" or "//"
   806  				cs -= 2
   807  			} else if c1.state == stateJSHTMLCloseCmt {
   808  				// "-->" instead of "/*" or "//"
   809  				cs -= 1
   810  			}
   811  			b.Write(s[written:cs])
   812  			written = i1
   813  		}
   814  		if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
   815  			b.Write(s[written:i])
   816  			b.Write(escapeSpecialScriptTags(s[i:i1]))
   817  			written = i1
   818  		}
   819  		if i == i1 && c.state == c1.state {
   820  			panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
   821  		}
   822  		c, i = c1, i1
   823  	}
   824  
   825  	if written != 0 && c.state != stateError {
   826  		if !isComment(c.state) || c.delim != delimNone {
   827  			b.Write(n.Text[written:])
   828  		}
   829  		e.editTextNode(n, b.Bytes())
   830  	}
   831  	return c
   832  }
   833  
   834  // contextAfterText starts in context c, consumes some tokens from the front of
   835  // s, then returns the context after those tokens and the unprocessed suffix.
   836  func contextAfterText(c context, s []byte) (context, int) {
   837  	if c.delim == delimNone {
   838  		c1, i := tSpecialTagEnd(c, s)
   839  		if i == 0 {
   840  			// A special end tag (`</script>`) has been seen and
   841  			// all content preceding it has been consumed.
   842  			return c1, 0
   843  		}
   844  		// Consider all content up to any end tag.
   845  		return transitionFunc[c.state](c, s[:i])
   846  	}
   847  
   848  	// We are at the beginning of an attribute value.
   849  
   850  	i := bytes.IndexAny(s, delimEnds[c.delim])
   851  	if i == -1 {
   852  		i = len(s)
   853  	}
   854  	if c.delim == delimSpaceOrTagEnd {
   855  		// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
   856  		// lists the runes below as error characters.
   857  		// Error out because HTML parsers may differ on whether
   858  		// "<a id= onclick=f("     ends inside id's or onclick's value,
   859  		// "<a class=`foo "        ends inside a value,
   860  		// "<a style=font:'Arial'" needs open-quote fixup.
   861  		// IE treats '`' as a quotation character.
   862  		if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
   863  			return context{
   864  				state: stateError,
   865  				err:   errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
   866  			}, len(s)
   867  		}
   868  	}
   869  	if i == len(s) {
   870  		// Remain inside the attribute.
   871  		// Decode the value so non-HTML rules can easily handle
   872  		//     <button onclick="alert(&quot;Hi!&quot;)">
   873  		// without having to entity decode token boundaries.
   874  		for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
   875  			c1, i1 := transitionFunc[c.state](c, u)
   876  			c, u = c1, u[i1:]
   877  		}
   878  		return c, len(s)
   879  	}
   880  
   881  	element := c.element
   882  
   883  	// If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
   884  	if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
   885  		element = elementNone
   886  	}
   887  
   888  	if c.delim != delimSpaceOrTagEnd {
   889  		// Consume any quote.
   890  		i++
   891  	}
   892  	// On exiting an attribute, we discard all state information
   893  	// except the state and element.
   894  	return context{state: stateTag, element: element}, i
   895  }
   896  
   897  // editActionNode records a change to an action pipeline for later commit.
   898  func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) {
   899  	if _, ok := e.actionNodeEdits[n]; ok {
   900  		panic(fmt.Sprintf("node %s shared between templates", n))
   901  	}
   902  	e.actionNodeEdits[n] = cmds
   903  }
   904  
   905  // editTemplateNode records a change to a {{template}} callee for later commit.
   906  func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) {
   907  	if _, ok := e.templateNodeEdits[n]; ok {
   908  		panic(fmt.Sprintf("node %s shared between templates", n))
   909  	}
   910  	e.templateNodeEdits[n] = callee
   911  }
   912  
   913  // editTextNode records a change to a text node for later commit.
   914  func (e *escaper) editTextNode(n *parse.TextNode, text []byte) {
   915  	if _, ok := e.textNodeEdits[n]; ok {
   916  		panic(fmt.Sprintf("node %s shared between templates", n))
   917  	}
   918  	e.textNodeEdits[n] = text
   919  }
   920  
   921  // commit applies changes to actions and template calls needed to contextually
   922  // autoescape content and adds any derived templates to the set.
   923  func (e *escaper) commit() {
   924  	for name := range e.output {
   925  		e.template(name).Funcs(funcMap)
   926  	}
   927  	// Any template from the name space associated with this escaper can be used
   928  	// to add derived templates to the underlying text/template name space.
   929  	tmpl := e.arbitraryTemplate()
   930  	for _, t := range e.derived {
   931  		if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil {
   932  			panic("error adding derived template")
   933  		}
   934  	}
   935  	for n, s := range e.actionNodeEdits {
   936  		ensurePipelineContains(n.Pipe, s)
   937  	}
   938  	for n, name := range e.templateNodeEdits {
   939  		n.Name = name
   940  	}
   941  	for n, s := range e.textNodeEdits {
   942  		n.Text = s
   943  	}
   944  	// Reset state that is specific to this commit so that the same changes are
   945  	// not re-applied to the template on subsequent calls to commit.
   946  	e.called = make(map[string]bool)
   947  	e.actionNodeEdits = make(map[*parse.ActionNode][]string)
   948  	e.templateNodeEdits = make(map[*parse.TemplateNode]string)
   949  	e.textNodeEdits = make(map[*parse.TextNode][]byte)
   950  }
   951  
   952  // template returns the named template given a mangled template name.
   953  func (e *escaper) template(name string) *template.Template {
   954  	// Any template from the name space associated with this escaper can be used
   955  	// to look up templates in the underlying text/template name space.
   956  	t := e.arbitraryTemplate().text.Lookup(name)
   957  	if t == nil {
   958  		t = e.derived[name]
   959  	}
   960  	return t
   961  }
   962  
   963  // arbitraryTemplate returns an arbitrary template from the name space
   964  // associated with e and panics if no templates are found.
   965  func (e *escaper) arbitraryTemplate() *Template {
   966  	for _, t := range e.ns.set {
   967  		return t
   968  	}
   969  	panic("no templates in name space")
   970  }
   971  
   972  // Forwarding functions so that clients need only import this package
   973  // to reach the general escaping functions of text/template.
   974  
   975  // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
   976  func HTMLEscape(w io.Writer, b []byte) {
   977  	template.HTMLEscape(w, b)
   978  }
   979  
   980  // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
   981  func HTMLEscapeString(s string) string {
   982  	return template.HTMLEscapeString(s)
   983  }
   984  
   985  // HTMLEscaper returns the escaped HTML equivalent of the textual
   986  // representation of its arguments.
   987  func HTMLEscaper(args ...any) string {
   988  	return template.HTMLEscaper(args...)
   989  }
   990  
   991  // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
   992  func JSEscape(w io.Writer, b []byte) {
   993  	template.JSEscape(w, b)
   994  }
   995  
   996  // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
   997  func JSEscapeString(s string) string {
   998  	return template.JSEscapeString(s)
   999  }
  1000  
  1001  // JSEscaper returns the escaped JavaScript equivalent of the textual
  1002  // representation of its arguments.
  1003  func JSEscaper(args ...any) string {
  1004  	return template.JSEscaper(args...)
  1005  }
  1006  
  1007  // URLQueryEscaper returns the escaped value of the textual representation of
  1008  // its arguments in a form suitable for embedding in a URL query.
  1009  func URLQueryEscaper(args ...any) string {
  1010  	return template.URLQueryEscaper(args...)
  1011  }
  1012  

View as plain text