Monday, November 08, 2010 at 12:05 AM.
system.verbs.builtins.radio.html.patchLinks
on patchLinks (s, staticUrl, dynamicUrl, uri) {
<<Changes
<<10/31/01; 1:14:11 AM by JES
<<Fixed a bug where links to pages below two or more levels deep from a page which was one level deep, would fail. Now all relative links work correctly.
<<10/30/01; 3:00:43 PM by PBS
<<Slight speed improvement by using string.patternMatch and eliminating more loops.
<<10/28/01; 3:47:53 PM by JES
<<Fixed a bug where only the first link in the page would be patched, because the current offset wasn't added to the offset found by string.patternMatch.
<<10/26/01; 9:26:35 PM by PBS
<<Instead of checking every third character, use string.patternMatch to find the beginning of links. Much faster.
<<9/3/01; 12:41:56 AM by JES
<<Created.
<<Patch the link URLs in the htmltext -- s, so that they link to urls in the site at staticUrl.
<<dynamicUrl is the url of the dynamic site.
<<uri is the path to the current page.
if not (staticUrl endsWith "/") { //pop off the filename
staticUrl = string.popSuffix (staticUrl, '/') + "/"};
if not (dynamicUrl endsWith "/") { //pop off the filename
dynamicUrl = string.popSuffix (dynamicUrl, '/') + "/"};
local (lowerDynamicUrl = string.lower (dynamicUrl)); //PBS 10/30/01: avoid calling string.lower so much later
<<if not (uri endsWith "/") //pop off the filename
<<uri = string.popSuffix (uri, '/') + "/"
if uri endsWith "/" { //add a dummy filename
uri = uri + "x.html"};
uri = string.popLeading (uri, '/');
local (thisPageDepth = string.countFields (uri, '/') - 1);
on getPatchedUrl (url) { //for a url in the dynamic site, return a relative link in the static site
if url contains ":" { //absolute link -- pop off the protocol and server parts
local (urlparts = string.urlSplit (url));
url = urlparts[3]}
else { //relative link -- either to the current page, or the top of the site
if url beginsWith "/" { //relative to top of site
url = string.popLeading (url, '/')}
else { //relative to the current page
return (url)}};
<<local (prefix = string.mid (uri, 1, sizeOf (uri) - 1))
<<bundle //deal with URLs that start with ../
<<if url beginsWith "../" //assume same relative location -- leave the URL alone
<<return (url)
<<while (url beginsWith "../")
<<prefix = string.popSuffix (prefix, '/')
<<url = string.delete (url, 1, 3)
<<url = prefix + "/" + url
<<At this point we have the url, relative to the top of the site -- now we generate the patched URL
bundle { //pop leading parts of the URI, if they match the path to the current page
local (ixpart = 1);
loop {
if ixpart == thisPageDepth {
break};
local (pathpart = string.nthField (uri, '/', ixpart));
if sizeOf (pathpart) == 0 {
break};
local (firstpart = string.nthField (url, '/', 1));
if pathpart == firstpart {
url = string.delete (url, 1, sizeOf (pathpart) + 1)}
else {
break};
ixpart++};
if ixpart <= thisPageDepth { //and thisPageDepth > 1
url = string.filledString ("../", thisPageDepth - ixpart + 1) + url}};
return (url)};
local (ct = sizeOf (s));
local (ix = 0);
loop { //loop through the text, find <a tags, patch hrefs
local (pos = 0);
pos = string.patternMatch ("<a ", string.lower (string.mid (s, ix, infinity))); //PBS 10/26/01: faster to use string.patternMatch
if pos < 1 {
break};
<<ix = ix + pos - 1 //this is the < character for the <a tag.
<<if string.lower (string.mid (s, ix, 3)) == "<a " //JES: commented out; we already know this will be true.
<<ix = ix + 3
ix = ix + pos + 2;
bundle { //find the href attribute
<<if string.mid (s, ix, 1) == ">" //end of the tag -- didn't find an href attribute
<<break
local (ixhref = 0);
ixhref = string.patternMatch ("href=", string.mid (s, ix, infinity)); //PBS 10/30/01: string.patternMatch is faster than looping
if ixhref > 0 {
ix = ix + ixhref + 4;
bundle { //find reasons not to patch the link
local (fragment = string.mid (s, ix, 15));
if fragment contains ":" { //absolute link, JavaScript, https, ftp, etc...
if not (string.lower (fragment) beginsWith lowerDynamicUrl) {
ix++;
continue}}};
<<bundle //old code
<<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what
<<break
<<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https
<<break
<<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp
<<break
local (endCh = ' ');
if s[ix] == "\"" or s[ix] == "\'" {
endCh = s[ix];
ix++};
local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'});
loop { //find the end of the URL
if ix > ct {
break};
if endchars contains s[ix] {
ixend = ix;
break};
ix++};
if ixend != nil { //patch the URL and replace it in the text
local (url = string.mid (s, ixstart, ixend - ixstart));
local (patchedUrl = getPatchedUrl (url)); //get the patched URL
bundle { //replace the url, in place
s = string.delete (s, ixstart, ixend - ixstart);
s = string.insert (patchedUrl, s, ixstart);
ix = ixstart + sizeOf (patchedUrl);
ct = sizeOf (s)};
bundle { //skip to the end of the tag
local (ixendtag = 0);
ixendtag = string.patternMatch (">", string.mid (s, ix, infinity));
if ixendtag < 1 {
break};
ix = ix + ixendtag}}}
<<if s[ix] == '>'
<<break
<<if ix >= ct
<<break
<<ix++
else { //skip to end of tag
local (ixendtag = 0);
ixendtag = string.patternMatch (">", string.mid (s, ix, infinity));
if ixendtag < 1 {
break};
ix = ix + ixendtag};
<<if string.lower (string.mid (s, ix, 5)) == "href=" //found an href
<<ix = ix + 5
<<local (endCh = ' ')
<<if s[ix] == "\"" or s[ix] == "\'"
<<endCh = s[ix]
<<ix++
<<bundle //find reasons not to patch the link
<<local (fragment = string.mid (s, ix, 15))
<<if fragment contains ":" //absolute link, JavaScript, https, ftp, etc...
<<if not (string.lower (fragment) beginsWith string.lower (dynamicUrl))
<<break
<<bundle //old code
<<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what
<<break
<<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https
<<break
<<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp
<<break
<<local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'})
<<loop //find the end of the URL
<<if ix > ct
<<break
<<if endchars contains s[ix]
<<ixend = ix
<<break
<<ix++
<<if ixend != nil //patch the URL and replace it in the text
<<msg (string.mid (s, ixstart, ixend - ixstart))
<<local (url = string.mid (s, ixstart, ixend - ixstart))
<<local (patchedUrl = getPatchedUrl (url)) //get the patched URL
<<bundle //calculate the patched URL
<<patchedUrl = getPatchedUrl (url)
<<if url beginsWith "/" //relative to the siteUrl
<<patchedUrl = staticUrl + string.popLeading (url, '/')
<<else //relative to the current page
<<patchedUrl = string.popLeading (url, '/')
<<local (ixpart = 1)
<<loop //pop leading parts of the URI, if they match the path to the current page
<<if ixpart == thisPageDepth
<<break
<<local (pathpart = string.nthField (uri, '/', ixpart))
<<if sizeOf (pathpart) == 0
<<break
<<local (firstpart = string.nthField (patchedUrl, '/', 1))
<<if pathpart == firstpart
<<patchedUrl = string.mid (patchedUrl, sizeOf (firstpart) + 2, infinity)
<<else
<<break
<<ixpart++
<<if ixpart < thisPageDepth
<<patchedUrl = string.filledString ("../", thisPAgeDepth - ixpart) + patchedUrl
<<bundle //replace the url, in place
<<s = string.delete (s, ixstart, ixend - ixstart)
<<s = string.insert (patchedUrl, s, ixstart)
<<ix = ixstart + sizeOf (patchedUrl)
<<ct = sizeOf (s)
<<loop //skip to the end of the tag
<<if s[ix] == '>'
<<break
<<if ix >= ct
<<break
<<ix++
<<break
ix++};
ix++;
if ix + 3 >= ct { //we're done
break}};
return (s)}
<<bundle //testing
<<bundle //links to deeper pages
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/index.html")
<<"This is a <a href=\"2001/08/03.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/")
<<"This is a <a href=\"2001/08/03.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/foo/bar.html")
<<"This is a <a href=\"../2001/08/03.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/stories/foo/bar.html")
<<"This is a <a href=\"../../2001/08/03.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/bar.html")
<<"This is a <a href=\"../2001/08/03.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/")
<<"This is a <a href=\"../2001/08/03.html\">test</a>."
<<bundle //links to shallower pages
<<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html")
<<"This is a <a href=\"../../../index.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/foo/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html")
<<"This is a <a href=\"../../index.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/")
<<"This is a <a href=\"../../../index.html\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/\">test</a>.", "", "", "/foo/bar/baz/boo.html")
<<"This is a <a href=\"../../../\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/stories/\">test</a>.", "", "", "/stories/foo/bar.html")
<<"This is a <a href=\"../\">test</a>."
<<radio.html.patchLinks ("This is a <a href=\"/stories/index.html\">test</a>.", "", "", "/blat/foo/bar.html")
<<"This is a <a href=\"../../stories/index.html\">test</a>."
<<bundle //more testing
<<local (s = string (workspace.htmltext))
<<local (dynamicUrl = "http://63.200.130.124:8081/")
<<local (uri = "/index.html")
<<wp.newTextObject (patchLinks (s, "", dynamicUrl, uri), @workspace.patchedhtmltext)
<<edit (@workspace.patchedhtmltext)
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.