Tuesday, November 30, 2010 at 12:01 AM.
system.verbs.builtins.xml.sitemapGenerate
on sitemapGenerate (sitefolder, baseurl, flsimple=false) { <<Changes <<11/29/10; 6:34:10 PM by DW <<Optional boolean, flsimple, default false. If true, we generate a single sitemap.xml file, no siteindex, and don't split the sitemaps into small files. The call is saying that it's a small site, and you can make it work with a simple sitemap. <<11/28/10; 4:10:17 PM by DW <<Lots of changes, optimizations. Runs a lot faster. <<11/28/10; 11:46:39 AM by DW <<Took the date/time out of the advert at the top of each file. It would cause them to change every time, defeating the whole lastmod thing. <<11/28/10; 8:40:04 AM by DW <<Simple sitemap generator. Each second-level folder gets a sitemap. The top-level folder gets a sitemap of the files it contains, and the top-level folder gets a sitemapindex that points to all the sitemaps. local (mapurls, pc = file.getpathchar (), f, maxfilesinsitemap, sitemapindexfname = "sitemapindex.xml"); new (tabletype, @mapurls); on writefile (f, s) { file.surefilepath (f); if file.exists (f) { if string (file.readwholefile (f)) != s { file.writewholefile (f, s)}} else { file.writewholefile (f, s)}}; on lastmod (f) { return (file.getdatepath ("-", file.modified (f), false))}; on advert () { return ("\r<!-- Generated by \"" + (this - "system.verbs.builtins.") + "\" running in the OPML Editor. -->\r")}; on encode (s) { return (xml.entityencode (s, true))}; on addmaptofolder (basefolder, flOneLevelOnly, baseurl) { local (ctsitemaps = 0, ctfilesinsitemap = 0); local (xmltext, indentlevel, ctfiles); on add (s) { xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";}; on startxml () { xmltext = ""; indentlevel = 0; ctfilesinsitemap = 0; add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); add (advert ()); add ("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++}; on endxml () { add ("</urlset>"); indentlevel--}; on writesitemap (flmorethanone) { local (f, fname); if flmorethanone { fname = "sitemap" + string.padwithzeros (++ctsitemaps, 2) + ".xml"; bundle { //delete sitemap.xml in this folder, if it exists local (f = basefolder + "sitemap.xml"); if file.exists (f) { file.delete (f)}}} else { fname = "sitemap.xml"}; f = basefolder + fname; endxml (); writefile (f, xmltext); mapurls.[baseurl + fname] = f}; startxml (); on dofolder (folder, folderurl) { on addfiletositemap (f) { if ctfilesinsitemap++ >= maxfilesinsitemap { writesitemap (true); startxml ()}; add ("<url>"); indentlevel++; add ("<loc>" + encode (folderurl + fname) + "</loc>"); add ("<lastmod>" + lastmod (f) + "</lastmod>"); add ("</url>"); indentlevel--; ctfiles++}; local (f, fname); scratchpad.sitemapfolder = folder; //debugging fileloop (f in folder) { if file.exists (f) { //we delete sitemap.xml if it's replaced with a numbered file, so f might not exist fname = file.filefrompath (f); if not (fname beginswith ".") { if file.isfolder (f) { if not flOneLevelOnly { dofolder (f, folderurl + string.mid (fname, 1, sizeof (fname) - 1) + "/")}} else { if not ((fname beginswith "sitemap") and (fname endswith ".xml")) { addfiletositemap (f)}}}}}}; dofolder (basefolder, baseurl); <<endxml () bundle { //write the file if ctfiles == 0 { local (f = basefolder + "sitemaps.xml"); if file.exists (f) { //no files in the folder, delete the sitemap (if it exists) file.delete (f)}} else { if ctfilesinsitemap > 0 { writesitemap (ctsitemaps != 0)}}}}; <<local (f = folder + sitemapfname) <<if ctfiles > 0 <<writefile (f, xmltext) <<mapurls.[baseurl + sitemapfname] = f <<else <<if file.exists (f) //no files in the folder, delete the sitemap (if it exists) <<file.delete (f) if flsimple { //11/29/10 by DW maxfilesinsitemap = 50000; //See http://sitemaps.org/protocol.php addmaptofolder (sitefolder, false, baseurl)} else { maxfilesinsitemap = 500; addmaptofolder (sitefolder, true, baseurl); fileloop (f in sitefolder) { if file.isfolder (f) { local (foldername = file.filefrompath (f) - pc); msg ("Creating sitemap for folder: " + f); addmaptofolder (f, false, baseurl + foldername + "/")}}; scratchpad.mapurls = mapurls; //debugging bundle { //generate the index local (xmltext = "", indentlevel = 0, adrurl); on add (s) { xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";}; add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); add (advert ()); add ("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++; for adrurl in @mapurls { local (f = adrurl^, url = nameof (adrurl^)); if file.exists (f) { add ("<sitemap>"); indentlevel++; add ("<loc>" + encode (url) + "</loc>"); add ("<lastmod>" + lastmod (f) + "</lastmod>"); add ("</sitemap>"); indentlevel--}}; add ("</sitemapindex>"); indentlevel--; writefile (sitefolder + sitemapindexfname, xmltext)}}}; bundle { //test code sitemapGenerate ("Ohio:Server Backups:static sites:Fresca:listings.opml.org:", "http://listings.opml.org/", true)} <<sitemapGenerate ("Ohio:scripting.com:", "http://scripting.com/")
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.