Monday, November 08, 2010 at 12:05 AM.
system.verbs.builtins.radio.html.patchLinks
on patchLinks (s, staticUrl, dynamicUrl, uri) { <<Changes <<10/31/01; 1:14:11 AM by JES <<Fixed a bug where links to pages below two or more levels deep from a page which was one level deep, would fail. Now all relative links work correctly. <<10/30/01; 3:00:43 PM by PBS <<Slight speed improvement by using string.patternMatch and eliminating more loops. <<10/28/01; 3:47:53 PM by JES <<Fixed a bug where only the first link in the page would be patched, because the current offset wasn't added to the offset found by string.patternMatch. <<10/26/01; 9:26:35 PM by PBS <<Instead of checking every third character, use string.patternMatch to find the beginning of links. Much faster. <<9/3/01; 12:41:56 AM by JES <<Created. <<Patch the link URLs in the htmltext -- s, so that they link to urls in the site at staticUrl. <<dynamicUrl is the url of the dynamic site. <<uri is the path to the current page. if not (staticUrl endsWith "/") { //pop off the filename staticUrl = string.popSuffix (staticUrl, '/') + "/"}; if not (dynamicUrl endsWith "/") { //pop off the filename dynamicUrl = string.popSuffix (dynamicUrl, '/') + "/"}; local (lowerDynamicUrl = string.lower (dynamicUrl)); //PBS 10/30/01: avoid calling string.lower so much later <<if not (uri endsWith "/") //pop off the filename <<uri = string.popSuffix (uri, '/') + "/" if uri endsWith "/" { //add a dummy filename uri = uri + "x.html"}; uri = string.popLeading (uri, '/'); local (thisPageDepth = string.countFields (uri, '/') - 1); on getPatchedUrl (url) { //for a url in the dynamic site, return a relative link in the static site if url contains ":" { //absolute link -- pop off the protocol and server parts local (urlparts = string.urlSplit (url)); url = urlparts[3]} else { //relative link -- either to the current page, or the top of the site if url beginsWith "/" { //relative to top of site url = string.popLeading (url, '/')} else { //relative to the current page return (url)}}; <<local (prefix = string.mid (uri, 1, sizeOf (uri) - 1)) <<bundle //deal with URLs that start with ../ <<if url beginsWith "../" //assume same relative location -- leave the URL alone <<return (url) <<while (url beginsWith "../") <<prefix = string.popSuffix (prefix, '/') <<url = string.delete (url, 1, 3) <<url = prefix + "/" + url <<At this point we have the url, relative to the top of the site -- now we generate the patched URL bundle { //pop leading parts of the URI, if they match the path to the current page local (ixpart = 1); loop { if ixpart == thisPageDepth { break}; local (pathpart = string.nthField (uri, '/', ixpart)); if sizeOf (pathpart) == 0 { break}; local (firstpart = string.nthField (url, '/', 1)); if pathpart == firstpart { url = string.delete (url, 1, sizeOf (pathpart) + 1)} else { break}; ixpart++}; if ixpart <= thisPageDepth { //and thisPageDepth > 1 url = string.filledString ("../", thisPageDepth - ixpart + 1) + url}}; return (url)}; local (ct = sizeOf (s)); local (ix = 0); loop { //loop through the text, find <a tags, patch hrefs local (pos = 0); pos = string.patternMatch ("<a ", string.lower (string.mid (s, ix, infinity))); //PBS 10/26/01: faster to use string.patternMatch if pos < 1 { break}; <<ix = ix + pos - 1 //this is the < character for the <a tag. <<if string.lower (string.mid (s, ix, 3)) == "<a " //JES: commented out; we already know this will be true. <<ix = ix + 3 ix = ix + pos + 2; bundle { //find the href attribute <<if string.mid (s, ix, 1) == ">" //end of the tag -- didn't find an href attribute <<break local (ixhref = 0); ixhref = string.patternMatch ("href=", string.mid (s, ix, infinity)); //PBS 10/30/01: string.patternMatch is faster than looping if ixhref > 0 { ix = ix + ixhref + 4; bundle { //find reasons not to patch the link local (fragment = string.mid (s, ix, 15)); if fragment contains ":" { //absolute link, JavaScript, https, ftp, etc... if not (string.lower (fragment) beginsWith lowerDynamicUrl) { ix++; continue}}}; <<bundle //old code <<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what <<break <<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https <<break <<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp <<break local (endCh = ' '); if s[ix] == "\"" or s[ix] == "\'" { endCh = s[ix]; ix++}; local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'}); loop { //find the end of the URL if ix > ct { break}; if endchars contains s[ix] { ixend = ix; break}; ix++}; if ixend != nil { //patch the URL and replace it in the text local (url = string.mid (s, ixstart, ixend - ixstart)); local (patchedUrl = getPatchedUrl (url)); //get the patched URL bundle { //replace the url, in place s = string.delete (s, ixstart, ixend - ixstart); s = string.insert (patchedUrl, s, ixstart); ix = ixstart + sizeOf (patchedUrl); ct = sizeOf (s)}; bundle { //skip to the end of the tag local (ixendtag = 0); ixendtag = string.patternMatch (">", string.mid (s, ix, infinity)); if ixendtag < 1 { break}; ix = ix + ixendtag}}} <<if s[ix] == '>' <<break <<if ix >= ct <<break <<ix++ else { //skip to end of tag local (ixendtag = 0); ixendtag = string.patternMatch (">", string.mid (s, ix, infinity)); if ixendtag < 1 { break}; ix = ix + ixendtag}; <<if string.lower (string.mid (s, ix, 5)) == "href=" //found an href <<ix = ix + 5 <<local (endCh = ' ') <<if s[ix] == "\"" or s[ix] == "\'" <<endCh = s[ix] <<ix++ <<bundle //find reasons not to patch the link <<local (fragment = string.mid (s, ix, 15)) <<if fragment contains ":" //absolute link, JavaScript, https, ftp, etc... <<if not (string.lower (fragment) beginsWith string.lower (dynamicUrl)) <<break <<bundle //old code <<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what <<break <<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https <<break <<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp <<break <<local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'}) <<loop //find the end of the URL <<if ix > ct <<break <<if endchars contains s[ix] <<ixend = ix <<break <<ix++ <<if ixend != nil //patch the URL and replace it in the text <<msg (string.mid (s, ixstart, ixend - ixstart)) <<local (url = string.mid (s, ixstart, ixend - ixstart)) <<local (patchedUrl = getPatchedUrl (url)) //get the patched URL <<bundle //calculate the patched URL <<patchedUrl = getPatchedUrl (url) <<if url beginsWith "/" //relative to the siteUrl <<patchedUrl = staticUrl + string.popLeading (url, '/') <<else //relative to the current page <<patchedUrl = string.popLeading (url, '/') <<local (ixpart = 1) <<loop //pop leading parts of the URI, if they match the path to the current page <<if ixpart == thisPageDepth <<break <<local (pathpart = string.nthField (uri, '/', ixpart)) <<if sizeOf (pathpart) == 0 <<break <<local (firstpart = string.nthField (patchedUrl, '/', 1)) <<if pathpart == firstpart <<patchedUrl = string.mid (patchedUrl, sizeOf (firstpart) + 2, infinity) <<else <<break <<ixpart++ <<if ixpart < thisPageDepth <<patchedUrl = string.filledString ("../", thisPAgeDepth - ixpart) + patchedUrl <<bundle //replace the url, in place <<s = string.delete (s, ixstart, ixend - ixstart) <<s = string.insert (patchedUrl, s, ixstart) <<ix = ixstart + sizeOf (patchedUrl) <<ct = sizeOf (s) <<loop //skip to the end of the tag <<if s[ix] == '>' <<break <<if ix >= ct <<break <<ix++ <<break ix++}; ix++; if ix + 3 >= ct { //we're done break}}; return (s)} <<bundle //testing <<bundle //links to deeper pages <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/index.html") <<"This is a <a href=\"2001/08/03.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/") <<"This is a <a href=\"2001/08/03.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/foo/bar.html") <<"This is a <a href=\"../2001/08/03.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/stories/foo/bar.html") <<"This is a <a href=\"../../2001/08/03.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/bar.html") <<"This is a <a href=\"../2001/08/03.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/") <<"This is a <a href=\"../2001/08/03.html\">test</a>." <<bundle //links to shallower pages <<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html") <<"This is a <a href=\"../../../index.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/foo/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html") <<"This is a <a href=\"../../index.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/") <<"This is a <a href=\"../../../index.html\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/\">test</a>.", "", "", "/foo/bar/baz/boo.html") <<"This is a <a href=\"../../../\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/stories/\">test</a>.", "", "", "/stories/foo/bar.html") <<"This is a <a href=\"../\">test</a>." <<radio.html.patchLinks ("This is a <a href=\"/stories/index.html\">test</a>.", "", "", "/blat/foo/bar.html") <<"This is a <a href=\"../../stories/index.html\">test</a>." <<bundle //more testing <<local (s = string (workspace.htmltext)) <<local (dynamicUrl = "http://63.200.130.124:8081/") <<local (uri = "/index.html") <<wp.newTextObject (patchLinks (s, "", dynamicUrl, uri), @workspace.patchedhtmltext) <<edit (@workspace.patchedhtmltext)
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.