Monday, November 08, 2010 at 12:03 AM.
system.verbs.builtins.html.neuterJavaScript
on neuterJavaScript (s, legalProtocolSchemes=nil) {
<<Neuter JavaScript in a string; neuter <script> tags, event handlers, and javascript: (and similar) protocols.
<<Changes:
<<2/10/02; 11:51:27 PM by JES
<<Fixed a bug where HTML attribute values which contained the string "on" would cause incorrect neutering of the text. Fixed a bug where if there were more than one on-event attribute, only the first would be neutered.
<<12/28/01; 4:20:30 AM by JES
<<Fixed a bug where HTML comments which contained the string "on" would not be closed properly, resulting in incorrectly formatted HTML.
<<2/23/01; 7:40:23 PM by JES
<<Fixed a bug where an un-closed tag would cause a script error, array index out of range.
<<2/20/01; 4:52:06 PM by PBS
<<Optimization: fast return if the string does not contain a < character.
<<2/20/01; 4:33:17 PM by JES
<<Fixed a bug where quote characters inside event handlers could potentially allow arbitrary text to pass through this filter.
<<2/20/01; 12:23:53 PM by JES
<<Instead of neutering event handlers by inserting an 'x' character, remove event handlers completely.
<<Neuter illegal protocol schemes in all applicable attributes, not just href's.
<<Instead of neutering attributes when the protocol scheme is illegal, neuter the entire tag by replacing the opening < with <.
<<2/19/01; 5:26:22 PM by PBS
<<Removed flNeuterEventHandlers and flNeuterScriptTags parameters.
<<Made legalProtocolSchemes an optional parameter. If not supplied, use the default list. The caller can either not supply it or over-ride it entirely.
<<2/19/01; 2:29:55 PM by JES
<<Neuter all illegal protocol schemes from <a href> tags.
<<2/17/01; 1:24:12 AM by JES
<<A whitespace character may be a space, a carriage return, or a linefeed character -- not just a space. Neuter <a href's which are javascript: protocol links, i.e. <a href="javascript:...>...</a>
<<2/16/01; 9:06:43 PM by PBS
<<Created.
if not (s contains "<") { //fast return if no < character
return (s)};
local (ix = 1);
local (whitespaceChars = {' ', '\t', '\r', '\n'}, quoteChars = {'\'', '\"'});
if legalProtocolSchemes == nil { //if nil, use default list
legalProtocolSchemes = {"http", "https", "ftp", "mailto", "file", "gopher", "news", "nntp", "telnet", "wais", "prospero", "nfs", "afs", "at", "binhex", "binhex4", "finger", "help", "about", "mac-binary", "mac-binhex", "mac-binhex40", "macbinary", "netphone", "nsl_neighborhood", "octet-stream", "pnm", "rtsp", "uue", "whois", "x-binary", "x-binhex40", "x-compress", "x-mac-binhex40", "x-macbinary", "x-netphone", "x-nsl_neighborhood", "x-sit", "x-stuffix", "x-tar", "zip"}};
while ix <= sizeof (s) {
if s[ix] == '<' and (string.mid (s, ix, 4) != "<!--") {
local (i, ixstarttagname = ix, ct);
local (flinquotes = false, flinsinglequotes = false, flindoublequotes = false);
for i = ix to sizeof (s) {
if ((s [i] == '>') and (not flinquotes)) or (i == sizeof (s)) {
bundle { //neuter script tags
local (tagname = string.mid (s, ix + 1, i - ix - 1));
if tagname beginswith "/" {
tagname = string.delete (tagname, 1, 1)};
tagname = string.trimWhiteSpace (tagname);
if tagname contains ' ' { //ignore any attributes
tagname = string.nthfield (tagname, ' ', 1)};
if string.lower (tagname) == "script" {
s = string.delete (s, ix, 1); //delete the <
s = string.insert ("<", s, ix)}; //replace with <
break}};
if s [i] == '\'' {
flinsinglequotes = not flinsinglequotes};
if s [i] == '\"' {
flindoublequotes = not flindoublequotes};
if flinsinglequotes or flindoublequotes {
flinquotes = true}
else {
flinquotes = false};
ct = sizeOf (s);
bundle { //neuter event handlers
if i < ct - 3 {
if whitespaceChars contains s[i] { //02/17/01 JES: whitespace can be \t, \r or \n
if string.lower (string.mid (s, i +1, 2)) == "on" { //an event handler?
if string.isAlpha (string.mid (s, i + 3, 1)) {
local (eventend = i + 1);
while eventend <= sizeOf (s) { //find the '=' character
if s[eventend] == '=' {
break};
eventend++};
eventend++;
while eventend <= sizeOf (s) { //find the first non-whitespace character after the '='
if not (whitespaceChars contains s[eventend]) {
break};
eventend++};
local (quotechar);
if eventend <= sizeOf (s) {
if (s[eventend] == '\'') or (s[eventend] == '\"') {
quotechar = s[eventend];
eventend++}};
while eventend <= sizeOf (s) {
if quotechar != nil {
if (s[eventend] == quotechar) {
if (s[eventend - 1] != '\\') {
eventend++;
break}}}
else {
if (whitespaceChars contains s[eventend]) or (s[eventend] == '>') {
break}};
eventend++};
s = string.delete (s, i, eventend - i);
ix--;
break}}}}};
if i < ct - 5 { //02/19/01 JES: neuter illegal protocol schemes in all applicable attributes
if whitespaceChars contains s[i] {
local (checkString = string.lower (string.mid (s, i + 1, 10)));
case true {
string.patternMatch ("action", checkString) > 0;
string.patternMatch ("background", checkString) > 0;
string.patternMatch ("code", checkString) > 0;
string.patternMatch ("codebase", checkString) > 0;
string.patternMatch ("data", checkString) > 0;
string.patternMatch ("datasrc", checkString) > 0;
string.patternMatch ("href", checkString) > 0;
string.patternMatch ("lowsrc", checkString) > 0;
string.patternMatch ("src", checkString) > 0 {
local (schemestart = i + 1);
while schemestart <= sizeOf (s) { //find the '=' character
if s[schemestart] == '=' {
break};
schemestart++};
schemestart++;
local (foundquote = false);
while schemestart <= sizeOf (s) { //find the protocol scheme start
if string.isAlpha (s[schemestart]) {
break};
case s[schemestart] {
'_';
'-' {
break}};
schemestart++};
local (schemeend = schemestart, flFoundScheme = true);
while schemeend <= sizeOf (s) {
if s[schemeend] == ':' { //the protocol scheme ends with a colon
break};
case s[schemeend] { //other characters which would end the href attribute
'/';
'\"';
'\'' {
flFoundScheme = false;
break}};
schemeend++};
if flFoundScheme {
local (scheme = string.lower (string.mid (s, schemestart, schemeend - schemestart)));
if not (legalProtocolSchemes contains scheme) {
s = string.delete (s, ix, 1); //delete the <
s = string.insert ("<", s, ix); //replace with <
break}}}}}}}};
ix++};
return (s)}
<<bundle //test code
<<html.neuterJavaScript ("<script type=\"text\">Foo</script>")
<<"<script type=\"text\">Foo</script>"
<<html.neuterJavaScript ("\"<script type=\"text\">Foo</script>")
<<"\"<script type=\"text\">Foo</script>"
<<html.neuterJavaScript ("< script type=\"text\">Foo</ script>")
<<"< script type=\"text\">Foo</ script>"
<<html.neuterJavaScript ("Foobar \r\n< script type=\"text\">Foo</ script>")
<<"Foobar \r\n< script type=\"text\">Foo</ script>"
<<html.neuterJavaScript ("<hr> <a href=\"http://foo.com/\" onmouseover='bar'>Foobar</a><p>\r\n< script type=\"text\">Foo</ script>")
<<"<hr> <a href=\"http://foo.com/\">Foobar</a><p>\r\n< script type=\"text\">Foo</ script>"
<<html.neuterJavaScript ("<a onmouseout='foo'\">Bar</a>")
<<"<a\">Bar</a>"
<<html.neuterJavaScript ("<a onmouseout='foo>' onmouseover='bar'>Bar</a>")
<<"<a>Bar</a>"
<<html.neuterJavaScript ("<a onmouseout='foo'> onmouseover='bar' Bar</a>")
<<"<a> onmouseover='bar' Bar</a>"
<<html.neuterJavaScript ("<a href='onmouseover.html'> onmouseover='bar' Bar</a>")
<<"<a href='onmouseover.html'> onmouseover='bar' Bar</a>"
<<html.neuterJavaScript ("<a href=\"javascript:foo\">Bar</a>")
<<"<a href=\"javascript:foo\">Bar</a>"
<<html.neuterJavaScript ("<img src=\"javascript:foo\">")
<<"<img src=\"javascript:foo\">"
<<html.neuterJavaScript ("<a href=\"perlscript:foo\">Bar</a>")
<<"<a href=\"perlscript:foo\">Bar</a>"
<<html.neuterJavaScript ("<a href=\" _some-other_script : foo \">Bar</a>")
<<"<a href=\" _some-other_script : foo \">Bar</a>"
<<html.neuterJavaScript ("<a href=\" _some-other_script : foo \" style='bar:baz'>Bar</a>")
<<"<a href=\" _some-other_script : foo \" style='bar:baz'>Bar</a>"
<<html.neuterJavaScript ("<a href=\"http://foo\">Bar</a>")
<<"<a href=\"http://foo\">Bar</a>"
<<html.neuterJavaScript ("<a href=\"/foo\">Bar</a>")
<<"<a href=\"/foo\">Bar</a>"
<<html.neuterJavaScript ("<a href=\"../foo\">Bar</a>")
<<"<a href=\"../foo\">Bar</a>"
<<html.neuterJavaScript ("<a href=\"foo\">Bar</a>")
<<"<a href=\"foo\">Bar</a>"
<<html.neuterJavaScript ("<a href='foo' style=baz:bam>Bar</a>")
<<"<a href='foo' style=baz:bam>Bar</a>"
<<html.neuterJavaScript ("<a href=\"foo\'\" onmouseover=\'foo\'>Bar</a>")
<<"<a href=\"foo'\">Bar</a>"
<<html.neuterJavaScript ("<a href=\"http://foo\'\" onmouseover=\'foo\'>Bar</a>", {"ftp", "mailto"})
<<"<a href=\"http://foo'\" onmouseover='foo'>Bar</a>"
<<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Message\")' foo>")
<<"<hr foo>"
<<html.neuterJavaScript ("<table><tr><td background=\"perlscript:deleteEverything\">Look out!!!</td></tr></table>")
<<"<table><tr><td background=\"perlscript:deleteEverything\">Look out!!!</td></tr></table>"
<<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Mess\\'a>ge\")' foo>")
<<"<hr foo>"
<<html.neuterJavaScript ("<hr onmouseover=\"foo\">")
<<"<hr>"
<<html.neuterJavaScript ("<hr onmouseover='window.alert (\"Message\")'>")
<<"<hr>"
<<html.neuterJavaScript ("foo bar baz")
<<"foo bar baz"
<<html.neuterJavaScript ("<!-- Get your groove on, Baby! -->")
<<"<!-- Get your groove on, Baby! -->"
<<html.neuterJavaScript("<a title='foo on bar' onmouseover='foo on bar' onmouseout='eatme()'>baz</a>")
<<"<a title='foo on bar'>baz</a>"
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.