Wednesday, December 01, 2010 at 4:37 AM.

system.verbs.builtins.xml.sitemapgenerate

on sitemapGenerate (sitefolder, baseurl, flsimple=false) {
	<<Changes
		<<11/29/10; 6:34:10 PM by DW
			<<Optional boolean, flsimple, default false. If true, we generate a single sitemap.xml file, no siteindex, and don't split the sitemaps into small files. The call is saying that it's a small site, and you can make it work with a simple sitemap. 
		<<11/28/10; 4:10:17 PM by DW
			<<Lots of changes, optimizations. Runs a lot faster.
		<<11/28/10; 11:46:39 AM by DW
			<<Took the date/time out of the advert at the top of each file. It would cause them to change every time, defeating the whole lastmod thing.
		<<11/28/10; 8:40:04 AM by DW
			<<Simple sitemap generator. Each second-level folder gets a sitemap. The top-level folder gets a sitemap of the files it contains, and the top-level folder gets a sitemapindex that points to all the sitemaps.
	local (mapurls, pc = file.getpathchar (), f, maxfilesinsitemap, sitemapindexfname = "sitemapindex.xml");
	new (tabletype, @mapurls);
	on writefile (f, s) {
		file.surefilepath (f);
		if file.exists (f) {
			if string (file.readwholefile (f)) != s {
				file.writewholefile (f, s)}}
		else {
			file.writewholefile (f, s)}};
	on lastmod (f) {
		return (file.getdatepath ("-", file.modified (f), false))};
	on advert () {
		return ("\r<!-- Generated by \"" + (this - "system.verbs.builtins.") + "\" running in the OPML Editor. -->\r")};
	on encode (s) {
		return (xml.entityencode (s, true))};
	on addmaptofolder (basefolder, flOneLevelOnly, baseurl) {
		local (ctsitemaps = 0, ctfilesinsitemap = 0);
		local (xmltext, indentlevel, ctfiles);
		on add (s) {
			xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";};
		on startxml () {
			xmltext = "";
			indentlevel = 0;
			ctfilesinsitemap = 0;
			add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
			add (advert ());
			add ("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++};
		on endxml () {
			add ("</urlset>"); indentlevel--};
		on writesitemap (flmorethanone) {
			local (f, fname);
			if flmorethanone {
				fname = "sitemap" + string.padwithzeros (++ctsitemaps, 2) + ".xml";
				bundle { //delete sitemap.xml in this folder, if it exists
					local (f = basefolder + "sitemap.xml");
					if file.exists (f) {
						file.delete (f)}}}
			else {
				fname = "sitemap.xml"};
			f = basefolder + fname;
			endxml ();
			writefile (f, xmltext);
			mapurls.[baseurl + fname] = f};
		startxml ();
		on dofolder (folder, folderurl) {
			on addfiletositemap (f) {
				if ctfilesinsitemap++ >= maxfilesinsitemap {
					writesitemap (true);
					startxml ()};
				add ("<url>"); indentlevel++;
				add ("<loc>" + encode (folderurl + fname) + "</loc>");
				add ("<lastmod>" + lastmod (f) + "</lastmod>");
				add ("</url>"); indentlevel--;
				ctfiles++};
			local (f, fname);
			scratchpad.sitemapfolder = folder; //debugging
			fileloop (f in folder) {
				if file.exists (f) { //we delete sitemap.xml if it's replaced with a numbered file, so f might not exist
					fname = file.filefrompath (f);
					if not (fname beginswith ".") {
						if file.isfolder (f) {
							if not flOneLevelOnly {
								dofolder (f, folderurl + string.mid (fname, 1, sizeof (fname) - 1) + "/")}}
						else {
							if not ((fname beginswith "sitemap") and (fname endswith ".xml")) {
								addfiletositemap (f)}}}}}};
		dofolder (basefolder, baseurl);
		<<endxml ()
		bundle { //write the file
			if ctfiles == 0 {
				local (f = basefolder + "sitemaps.xml");
				if file.exists (f) { //no files in the folder, delete the sitemap (if it exists)
					file.delete (f)}}
			else {
				if ctfilesinsitemap > 0 {
					writesitemap (ctsitemaps != 0)}}}};
			<<local (f = folder + sitemapfname)
			<<if ctfiles > 0
				<<writefile (f, xmltext)
				<<mapurls.[baseurl + sitemapfname] = f
			<<else
				<<if file.exists (f) //no files in the folder, delete the sitemap (if it exists)
					<<file.delete (f)
	if flsimple { //11/29/10 by DW
		maxfilesinsitemap = 50000; //See http://sitemaps.org/protocol.php
		addmaptofolder (sitefolder, false, baseurl)}
	else {
		maxfilesinsitemap = 500;
		addmaptofolder (sitefolder, true, baseurl);
		fileloop (f in sitefolder) {
			if file.isfolder (f) {
				local (foldername = file.filefrompath (f) - pc);
				msg ("Creating sitemap for folder: " + f);
				addmaptofolder (f, false, baseurl + foldername + "/")}};
		scratchpad.mapurls = mapurls; //debugging
		bundle { //generate the index
			local (xmltext = "", indentlevel = 0, adrurl);
			on add (s) {
				xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";};
			add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
			add (advert ());
			add ("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++;
			for adrurl in @mapurls {
				local (f = adrurl^, url = nameof (adrurl^));
				if file.exists (f) {
					add ("<sitemap>"); indentlevel++;
					add ("<loc>" + encode (url) + "</loc>");
					add ("<lastmod>" + lastmod (f) + "</lastmod>");
					add ("</sitemap>"); indentlevel--}};
			add ("</sitemapindex>"); indentlevel--;
			writefile (sitefolder + sitemapindexfname, xmltext)}}};
bundle { //test code
	sitemapGenerate ("Ohio:Server Backups:static sites:Fresca:listings.opml.org:", "http://listings.opml.org/", true)}
	<<sitemapGenerate ("Ohio:scripting.com:", "http://scripting.com/")



This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.