Monday, November 08, 2010 at 12:03 AM.
system.verbs.builtins.html.neuterJavaScript
on neuterJavaScript (s, legalProtocolSchemes=nil) { <<Neuter JavaScript in a string; neuter <script> tags, event handlers, and javascript: (and similar) protocols. <<Changes: <<2/10/02; 11:51:27 PM by JES <<Fixed a bug where HTML attribute values which contained the string "on" would cause incorrect neutering of the text. Fixed a bug where if there were more than one on-event attribute, only the first would be neutered. <<12/28/01; 4:20:30 AM by JES <<Fixed a bug where HTML comments which contained the string "on" would not be closed properly, resulting in incorrectly formatted HTML. <<2/23/01; 7:40:23 PM by JES <<Fixed a bug where an un-closed tag would cause a script error, array index out of range. <<2/20/01; 4:52:06 PM by PBS <<Optimization: fast return if the string does not contain a < character. <<2/20/01; 4:33:17 PM by JES <<Fixed a bug where quote characters inside event handlers could potentially allow arbitrary text to pass through this filter. <<2/20/01; 12:23:53 PM by JES <<Instead of neutering event handlers by inserting an 'x' character, remove event handlers completely. <<Neuter illegal protocol schemes in all applicable attributes, not just href's. <<Instead of neutering attributes when the protocol scheme is illegal, neuter the entire tag by replacing the opening < with <. <<2/19/01; 5:26:22 PM by PBS <<Removed flNeuterEventHandlers and flNeuterScriptTags parameters. <<Made legalProtocolSchemes an optional parameter. If not supplied, use the default list. The caller can either not supply it or over-ride it entirely. <<2/19/01; 2:29:55 PM by JES <<Neuter all illegal protocol schemes from <a href> tags. <<2/17/01; 1:24:12 AM by JES <<A whitespace character may be a space, a carriage return, or a linefeed character -- not just a space. Neuter <a href's which are javascript: protocol links, i.e. <a href="javascript:...>...</a> <<2/16/01; 9:06:43 PM by PBS <<Created. if not (s contains "<") { //fast return if no < character return (s)}; local (ix = 1); local (whitespaceChars = {' ', '\t', '\r', '\n'}, quoteChars = {'\'', '\"'}); if legalProtocolSchemes == nil { //if nil, use default list legalProtocolSchemes = {"http", "https", "ftp", "mailto", "file", "gopher", "news", "nntp", "telnet", "wais", "prospero", "nfs", "afs", "at", "binhex", "binhex4", "finger", "help", "about", "mac-binary", "mac-binhex", "mac-binhex40", "macbinary", "netphone", "nsl_neighborhood", "octet-stream", "pnm", "rtsp", "uue", "whois", "x-binary", "x-binhex40", "x-compress", "x-mac-binhex40", "x-macbinary", "x-netphone", "x-nsl_neighborhood", "x-sit", "x-stuffix", "x-tar", "zip"}}; while ix <= sizeof (s) { if s[ix] == '<' and (string.mid (s, ix, 4) != "<!--") { local (i, ixstarttagname = ix, ct); local (flinquotes = false, flinsinglequotes = false, flindoublequotes = false); for i = ix to sizeof (s) { if ((s [i] == '>') and (not flinquotes)) or (i == sizeof (s)) { bundle { //neuter script tags local (tagname = string.mid (s, ix + 1, i - ix - 1)); if tagname beginswith "/" { tagname = string.delete (tagname, 1, 1)}; tagname = string.trimWhiteSpace (tagname); if tagname contains ' ' { //ignore any attributes tagname = string.nthfield (tagname, ' ', 1)}; if string.lower (tagname) == "script" { s = string.delete (s, ix, 1); //delete the < s = string.insert ("<", s, ix)}; //replace with < break}}; if s [i] == '\'' { flinsinglequotes = not flinsinglequotes}; if s [i] == '\"' { flindoublequotes = not flindoublequotes}; if flinsinglequotes or flindoublequotes { flinquotes = true} else { flinquotes = false}; ct = sizeOf (s); bundle { //neuter event handlers if i < ct - 3 { if whitespaceChars contains s[i] { //02/17/01 JES: whitespace can be \t, \r or \n if string.lower (string.mid (s, i +1, 2)) == "on" { //an event handler? if string.isAlpha (string.mid (s, i + 3, 1)) { local (eventend = i + 1); while eventend <= sizeOf (s) { //find the '=' character if s[eventend] == '=' { break}; eventend++}; eventend++; while eventend <= sizeOf (s) { //find the first non-whitespace character after the '=' if not (whitespaceChars contains s[eventend]) { break}; eventend++}; local (quotechar); if eventend <= sizeOf (s) { if (s[eventend] == '\'') or (s[eventend] == '\"') { quotechar = s[eventend]; eventend++}}; while eventend <= sizeOf (s) { if quotechar != nil { if (s[eventend] == quotechar) { if (s[eventend - 1] != '\\') { eventend++; break}}} else { if (whitespaceChars contains s[eventend]) or (s[eventend] == '>') { break}}; eventend++}; s = string.delete (s, i, eventend - i); ix--; break}}}}}; if i < ct - 5 { //02/19/01 JES: neuter illegal protocol schemes in all applicable attributes if whitespaceChars contains s[i] { local (checkString = string.lower (string.mid (s, i + 1, 10))); case true { string.patternMatch ("action", checkString) > 0; string.patternMatch ("background", checkString) > 0; string.patternMatch ("code", checkString) > 0; string.patternMatch ("codebase", checkString) > 0; string.patternMatch ("data", checkString) > 0; string.patternMatch ("datasrc", checkString) > 0; string.patternMatch ("href", checkString) > 0; string.patternMatch ("lowsrc", checkString) > 0; string.patternMatch ("src", checkString) > 0 { local (schemestart = i + 1); while schemestart <= sizeOf (s) { //find the '=' character if s[schemestart] == '=' { break}; schemestart++}; schemestart++; local (foundquote = false); while schemestart <= sizeOf (s) { //find the protocol scheme start if string.isAlpha (s[schemestart]) { break}; case s[schemestart] { '_'; '-' { break}}; schemestart++}; local (schemeend = schemestart, flFoundScheme = true); while schemeend <= sizeOf (s) { if s[schemeend] == ':' { //the protocol scheme ends with a colon break}; case s[schemeend] { //other characters which would end the href attribute '/'; '\"'; '\'' { flFoundScheme = false; break}}; schemeend++}; if flFoundScheme { local (scheme = string.lower (string.mid (s, schemestart, schemeend - schemestart))); if not (legalProtocolSchemes contains scheme) { s = string.delete (s, ix, 1); //delete the < s = string.insert ("<", s, ix); //replace with < break}}}}}}}}; ix++}; return (s)} <<bundle //test code <<html.neuterJavaScript ("<script type=\"text\">Foo</script>") <<"<script type=\"text\">Foo</script>" <<html.neuterJavaScript ("\"<script type=\"text\">Foo</script>") <<"\"<script type=\"text\">Foo</script>" <<html.neuterJavaScript ("< script type=\"text\">Foo</ script>") <<"< script type=\"text\">Foo</ script>" <<html.neuterJavaScript ("Foobar \r\n< script type=\"text\">Foo</ script>") <<"Foobar \r\n< script type=\"text\">Foo</ script>" <<html.neuterJavaScript ("<hr> <a href=\"http://foo.com/\" onmouseover='bar'>Foobar</a><p>\r\n< script type=\"text\">Foo</ script>") <<"<hr> <a href=\"http://foo.com/\">Foobar</a><p>\r\n< script type=\"text\">Foo</ script>" <<html.neuterJavaScript ("<a onmouseout='foo'\">Bar</a>") <<"<a\">Bar</a>" <<html.neuterJavaScript ("<a onmouseout='foo>' onmouseover='bar'>Bar</a>") <<"<a>Bar</a>" <<html.neuterJavaScript ("<a onmouseout='foo'> onmouseover='bar' Bar</a>") <<"<a> onmouseover='bar' Bar</a>" <<html.neuterJavaScript ("<a href='onmouseover.html'> onmouseover='bar' Bar</a>") <<"<a href='onmouseover.html'> onmouseover='bar' Bar</a>" <<html.neuterJavaScript ("<a href=\"javascript:foo\">Bar</a>") <<"<a href=\"javascript:foo\">Bar</a>" <<html.neuterJavaScript ("<img src=\"javascript:foo\">") <<"<img src=\"javascript:foo\">" <<html.neuterJavaScript ("<a href=\"perlscript:foo\">Bar</a>") <<"<a href=\"perlscript:foo\">Bar</a>" <<html.neuterJavaScript ("<a href=\" _some-other_script : foo \">Bar</a>") <<"<a href=\" _some-other_script : foo \">Bar</a>" <<html.neuterJavaScript ("<a href=\" _some-other_script : foo \" style='bar:baz'>Bar</a>") <<"<a href=\" _some-other_script : foo \" style='bar:baz'>Bar</a>" <<html.neuterJavaScript ("<a href=\"http://foo\">Bar</a>") <<"<a href=\"http://foo\">Bar</a>" <<html.neuterJavaScript ("<a href=\"/foo\">Bar</a>") <<"<a href=\"/foo\">Bar</a>" <<html.neuterJavaScript ("<a href=\"../foo\">Bar</a>") <<"<a href=\"../foo\">Bar</a>" <<html.neuterJavaScript ("<a href=\"foo\">Bar</a>") <<"<a href=\"foo\">Bar</a>" <<html.neuterJavaScript ("<a href='foo' style=baz:bam>Bar</a>") <<"<a href='foo' style=baz:bam>Bar</a>" <<html.neuterJavaScript ("<a href=\"foo\'\" onmouseover=\'foo\'>Bar</a>") <<"<a href=\"foo'\">Bar</a>" <<html.neuterJavaScript ("<a href=\"http://foo\'\" onmouseover=\'foo\'>Bar</a>", {"ftp", "mailto"}) <<"<a href=\"http://foo'\" onmouseover='foo'>Bar</a>" <<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Message\")' foo>") <<"<hr foo>" <<html.neuterJavaScript ("<table><tr><td background=\"perlscript:deleteEverything\">Look out!!!</td></tr></table>") <<"<table><tr><td background=\"perlscript:deleteEverything\">Look out!!!</td></tr></table>" <<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Mess\\'a>ge\")' foo>") <<"<hr foo>" <<html.neuterJavaScript ("<hr onmouseover=\"foo\">") <<"<hr>" <<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Message\")'>") <<"<hr>" <<html.neuterJavaScript ("foo bar baz") <<"foo bar baz" <<html.neuterJavaScript ("<!-- Get your groove on, Baby! -->") <<"<!-- Get your groove on, Baby! -->" <<html.neuterJavaScript("<a title='foo on bar' onmouseover='foo on bar' onmouseout='eatme()'>baz</a>") <<"<a title='foo on bar'>baz</a>"
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.