Monday, November 08, 2010 at 12:05 AM.

system.verbs.builtins.radio.html.patchLinks

on patchLinks (s, staticUrl, dynamicUrl, uri) {
	<<Changes
		<<10/31/01; 1:14:11 AM by JES
			<<Fixed a bug where links to pages below two or more levels deep from a page which was one level deep, would fail. Now all relative links work correctly.
		<<10/30/01; 3:00:43 PM by PBS
			<<Slight speed improvement by using string.patternMatch and eliminating more loops.
		<<10/28/01; 3:47:53 PM by JES
			<<Fixed a bug where only the first link in the page would be patched, because the current offset wasn't added to the offset found by string.patternMatch.
		<<10/26/01; 9:26:35 PM by PBS
			<<Instead of checking every third character, use string.patternMatch to find the beginning of links. Much faster.
		<<9/3/01; 12:41:56 AM by JES
			<<Created.
			<<Patch the link URLs in the htmltext -- s, so that they link to urls in the site at staticUrl.
			<<dynamicUrl is the url of the dynamic site.
			<<uri is the path to the current page.
	
	if not (staticUrl endsWith "/") { //pop off the filename
		staticUrl = string.popSuffix (staticUrl, '/') + "/"};
	if not (dynamicUrl endsWith "/") { //pop off the filename
		dynamicUrl = string.popSuffix (dynamicUrl, '/') + "/"};
	local (lowerDynamicUrl = string.lower (dynamicUrl)); //PBS 10/30/01: avoid calling string.lower so much later
	<<if not (uri endsWith "/") //pop off the filename
		<<uri = string.popSuffix (uri, '/') + "/"
	if uri endsWith "/" { //add a dummy filename
		uri = uri + "x.html"};
	uri = string.popLeading (uri, '/');
	
	local (thisPageDepth = string.countFields (uri, '/') - 1);
	
	on getPatchedUrl (url) { //for a url in the dynamic site, return a relative link in the static site
		if url contains ":" { //absolute link -- pop off the protocol and server parts
			local (urlparts = string.urlSplit (url));
			url = urlparts[3]}
		else { //relative link -- either to the current page, or the top of the site
			if url beginsWith "/" { //relative to top of site
				url = string.popLeading (url, '/')}
			else { //relative to the current page
				return (url)}};
				<<local (prefix = string.mid (uri, 1, sizeOf (uri) - 1))
				<<bundle //deal with URLs that start with ../
					<<if url beginsWith "../" //assume same relative location -- leave the URL alone
						<<return (url)
					<<while (url beginsWith "../")
						<<prefix = string.popSuffix (prefix, '/')
						<<url = string.delete (url, 1, 3)
				<<url = prefix + "/" + url
		
		<<At this point we have the url, relative to the top of the site -- now we generate the patched URL
		bundle { //pop leading parts of the URI, if they match the path to the current page
			local (ixpart = 1);
			loop {
				if ixpart == thisPageDepth {
					break};
				local (pathpart = string.nthField (uri, '/', ixpart));
				if sizeOf (pathpart) == 0 {
					break};
				local (firstpart = string.nthField (url, '/', 1));
				if pathpart == firstpart {
					url = string.delete (url, 1, sizeOf (pathpart) + 1)}
				else {
					break};
				ixpart++};
			if ixpart <= thisPageDepth { //and thisPageDepth > 1
				url = string.filledString ("../", thisPageDepth - ixpart + 1) + url}};
		
		return (url)};
	
	local (ct = sizeOf (s));
	local (ix = 0);
	loop { //loop through the text, find <a tags, patch hrefs
		local (pos = 0);
		pos = string.patternMatch ("<a ", string.lower (string.mid (s, ix, infinity))); //PBS 10/26/01: faster to use string.patternMatch
		if pos < 1 {
			break};
		<<ix = ix + pos - 1 //this is the < character for the <a tag.
		<<if string.lower (string.mid (s, ix, 3)) == "<a " //JES: commented out; we already know this will be true.
		<<ix = ix + 3
		ix = ix + pos + 2;
		bundle { //find the href attribute
			<<if string.mid (s, ix, 1) == ">" //end of the tag -- didn't find an href attribute
				<<break
			local (ixhref = 0);
			ixhref = string.patternMatch ("href=", string.mid (s, ix, infinity)); //PBS 10/30/01: string.patternMatch is faster than looping
			if ixhref > 0 {
				ix = ix + ixhref + 4;
				bundle { //find reasons not to patch the link
					local (fragment = string.mid (s, ix, 15));
					if fragment contains ":" { //absolute link, JavaScript, https, ftp, etc...
						if not (string.lower (fragment) beginsWith lowerDynamicUrl) {
							ix++;
							continue}}};
					<<bundle //old code
						<<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what
							<<break
						<<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https
							<<break
						<<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp
							<<break
				local (endCh = ' ');
				if s[ix] == "\"" or s[ix] == "\'" {
					endCh = s[ix];
					ix++};
				local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'});
				loop { //find the end of the URL
					if ix > ct {
						break};
					if endchars contains s[ix] {
						ixend = ix;
						break};
					ix++};
				if ixend != nil { //patch the URL and replace it in the text
					local (url = string.mid (s, ixstart, ixend - ixstart));
					local (patchedUrl = getPatchedUrl (url)); //get the patched URL
					bundle { //replace the url, in place
						s = string.delete (s, ixstart, ixend - ixstart);
						s = string.insert (patchedUrl, s, ixstart);
						ix = ixstart + sizeOf (patchedUrl);
						ct = sizeOf (s)};
					bundle { //skip to the end of the tag
						local (ixendtag = 0);
						ixendtag = string.patternMatch (">", string.mid (s, ix, infinity));
						if ixendtag < 1 {
							break};
						ix = ix + ixendtag}}}
						<<if s[ix] == '>'
							<<break
						<<if ix >= ct
							<<break
						<<ix++
			else { //skip to end of tag
				local (ixendtag = 0);
				ixendtag = string.patternMatch (">", string.mid (s, ix, infinity));
				if ixendtag < 1 {
					break};
				ix = ix + ixendtag};
			<<if string.lower (string.mid (s, ix, 5)) == "href=" //found an href
				<<ix = ix + 5
				<<local (endCh = ' ')
				<<if s[ix] == "\"" or s[ix] == "\'"
					<<endCh = s[ix]
					<<ix++
				<<bundle //find reasons not to patch the link
					<<local (fragment = string.mid (s, ix, 15))
					<<if fragment contains ":" //absolute link, JavaScript, https, ftp, etc...
						<<if not (string.lower (fragment) beginsWith string.lower (dynamicUrl))
							<<break
					<<bundle //old code
						<<if string.lower (string.mid (s, ix, 7)) == "http://" //assume that absolute links are absolute no matter what
							<<break
						<<if string.lower (string.mid (s, ix, 8)) == "https://" //skip https
							<<break
						<<if string.lower (string.mid (s, ix, 4)) == "ftp:" //skip ftp
							<<break
				<<local (ixstart = ix, ixend, endchars = {endCh, '\r', '\n', '\t'})
				<<loop //find the end of the URL
					<<if ix > ct
						<<break
					<<if endchars contains s[ix]
						<<ixend = ix
						<<break
					<<ix++
				<<if ixend != nil //patch the URL and replace it in the text
					<<msg (string.mid (s, ixstart, ixend - ixstart))
					<<local (url = string.mid (s, ixstart, ixend - ixstart))
					<<local (patchedUrl = getPatchedUrl (url)) //get the patched URL
					<<bundle //calculate the patched URL
						<<patchedUrl = getPatchedUrl (url)
						<<if url beginsWith "/" //relative to the siteUrl
							<<patchedUrl = staticUrl + string.popLeading (url, '/')
						<<else //relative to the current page
							<<patchedUrl = string.popLeading (url, '/')
							<<local (ixpart = 1)
							<<loop //pop leading parts of the URI, if they match the path to the current page
								<<if ixpart == thisPageDepth
									<<break
								<<local (pathpart = string.nthField (uri, '/', ixpart))
								<<if sizeOf (pathpart) == 0
									<<break
								<<local (firstpart = string.nthField (patchedUrl, '/', 1))
								<<if pathpart == firstpart
									<<patchedUrl = string.mid (patchedUrl, sizeOf (firstpart) + 2, infinity)
								<<else
									<<break
								<<ixpart++
							<<if ixpart < thisPageDepth
								<<patchedUrl = string.filledString ("../", thisPAgeDepth - ixpart) + patchedUrl
					<<bundle //replace the url, in place
						<<s = string.delete (s, ixstart, ixend - ixstart)
						<<s = string.insert (patchedUrl, s, ixstart)
						<<ix = ixstart + sizeOf (patchedUrl)
						<<ct = sizeOf (s)
					<<loop //skip to the end of the tag
						<<if s[ix] == '>'
							<<break
						<<if ix >= ct
							<<break
						<<ix++
				<<break
			ix++};
		ix++;
		if ix + 3 >= ct { //we're done
			break}};
	return (s)}
<<bundle //testing
	<<bundle //links to deeper pages
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/index.html")
			<<"This is a <a href=\"2001/08/03.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/")
			<<"This is a <a href=\"2001/08/03.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/foo/bar.html")
			<<"This is a <a href=\"../2001/08/03.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/stories/foo/bar.html")
			<<"This is a <a href=\"../../2001/08/03.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/bar.html")
			<<"This is a <a href=\"../2001/08/03.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/2001/08/03.html\">test</a>.", "", "", "/categories/")
			<<"This is a <a href=\"../2001/08/03.html\">test</a>."
	<<bundle //links to shallower pages
		<<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html")
			<<"This is a <a href=\"../../../index.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/foo/index.html\">test</a>.", "", "", "/foo/bar/baz/boo.html")
			<<"This is a <a href=\"../../index.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/index.html\">test</a>.", "", "", "/foo/bar/baz/")
			<<"This is a <a href=\"../../../index.html\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/\">test</a>.", "", "", "/foo/bar/baz/boo.html")
			<<"This is a <a href=\"../../../\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/stories/\">test</a>.", "", "", "/stories/foo/bar.html")
			<<"This is a <a href=\"../\">test</a>."
		<<radio.html.patchLinks ("This is a <a href=\"/stories/index.html\">test</a>.", "", "", "/blat/foo/bar.html")
			<<"This is a <a href=\"../../stories/index.html\">test</a>."
<<bundle //more testing
	<<local (s = string (workspace.htmltext))
	<<local (dynamicUrl = "http://63.200.130.124:8081/")
	<<local (uri = "/index.html")
	<<wp.newTextObject (patchLinks (s, "", dynamicUrl, uri), @workspace.patchedhtmltext)
	<<edit (@workspace.patchedhtmltext)



This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.