Wednesday, December 01, 2010 at 4:37 AM.
system.verbs.builtins.xml.sitemapgenerate
on sitemapGenerate (sitefolder, baseurl, flsimple=false) {
<<Changes
<<11/29/10; 6:34:10 PM by DW
<<Optional boolean, flsimple, default false. If true, we generate a single sitemap.xml file, no siteindex, and don't split the sitemaps into small files. The call is saying that it's a small site, and you can make it work with a simple sitemap.
<<11/28/10; 4:10:17 PM by DW
<<Lots of changes, optimizations. Runs a lot faster.
<<11/28/10; 11:46:39 AM by DW
<<Took the date/time out of the advert at the top of each file. It would cause them to change every time, defeating the whole lastmod thing.
<<11/28/10; 8:40:04 AM by DW
<<Simple sitemap generator. Each second-level folder gets a sitemap. The top-level folder gets a sitemap of the files it contains, and the top-level folder gets a sitemapindex that points to all the sitemaps.
local (mapurls, pc = file.getpathchar (), f, maxfilesinsitemap, sitemapindexfname = "sitemapindex.xml");
new (tabletype, @mapurls);
on writefile (f, s) {
file.surefilepath (f);
if file.exists (f) {
if string (file.readwholefile (f)) != s {
file.writewholefile (f, s)}}
else {
file.writewholefile (f, s)}};
on lastmod (f) {
return (file.getdatepath ("-", file.modified (f), false))};
on advert () {
return ("\r<!-- Generated by \"" + (this - "system.verbs.builtins.") + "\" running in the OPML Editor. -->\r")};
on encode (s) {
return (xml.entityencode (s, true))};
on addmaptofolder (basefolder, flOneLevelOnly, baseurl) {
local (ctsitemaps = 0, ctfilesinsitemap = 0);
local (xmltext, indentlevel, ctfiles);
on add (s) {
xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";};
on startxml () {
xmltext = "";
indentlevel = 0;
ctfilesinsitemap = 0;
add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
add (advert ());
add ("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++};
on endxml () {
add ("</urlset>"); indentlevel--};
on writesitemap (flmorethanone) {
local (f, fname);
if flmorethanone {
fname = "sitemap" + string.padwithzeros (++ctsitemaps, 2) + ".xml";
bundle { //delete sitemap.xml in this folder, if it exists
local (f = basefolder + "sitemap.xml");
if file.exists (f) {
file.delete (f)}}}
else {
fname = "sitemap.xml"};
f = basefolder + fname;
endxml ();
writefile (f, xmltext);
mapurls.[baseurl + fname] = f};
startxml ();
on dofolder (folder, folderurl) {
on addfiletositemap (f) {
if ctfilesinsitemap++ >= maxfilesinsitemap {
writesitemap (true);
startxml ()};
add ("<url>"); indentlevel++;
add ("<loc>" + encode (folderurl + fname) + "</loc>");
add ("<lastmod>" + lastmod (f) + "</lastmod>");
add ("</url>"); indentlevel--;
ctfiles++};
local (f, fname);
scratchpad.sitemapfolder = folder; //debugging
fileloop (f in folder) {
if file.exists (f) { //we delete sitemap.xml if it's replaced with a numbered file, so f might not exist
fname = file.filefrompath (f);
if not (fname beginswith ".") {
if file.isfolder (f) {
if not flOneLevelOnly {
dofolder (f, folderurl + string.mid (fname, 1, sizeof (fname) - 1) + "/")}}
else {
if not ((fname beginswith "sitemap") and (fname endswith ".xml")) {
addfiletositemap (f)}}}}}};
dofolder (basefolder, baseurl);
<<endxml ()
bundle { //write the file
if ctfiles == 0 {
local (f = basefolder + "sitemaps.xml");
if file.exists (f) { //no files in the folder, delete the sitemap (if it exists)
file.delete (f)}}
else {
if ctfilesinsitemap > 0 {
writesitemap (ctsitemaps != 0)}}}};
<<local (f = folder + sitemapfname)
<<if ctfiles > 0
<<writefile (f, xmltext)
<<mapurls.[baseurl + sitemapfname] = f
<<else
<<if file.exists (f) //no files in the folder, delete the sitemap (if it exists)
<<file.delete (f)
if flsimple { //11/29/10 by DW
maxfilesinsitemap = 50000; //See http://sitemaps.org/protocol.php
addmaptofolder (sitefolder, false, baseurl)}
else {
maxfilesinsitemap = 500;
addmaptofolder (sitefolder, true, baseurl);
fileloop (f in sitefolder) {
if file.isfolder (f) {
local (foldername = file.filefrompath (f) - pc);
msg ("Creating sitemap for folder: " + f);
addmaptofolder (f, false, baseurl + foldername + "/")}};
scratchpad.mapurls = mapurls; //debugging
bundle { //generate the index
local (xmltext = "", indentlevel = 0, adrurl);
on add (s) {
xmltext = xmltext + string.filledstring ("\t", indentlevel) + s + "\r";};
add ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
add (advert ());
add ("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"); indentlevel++;
for adrurl in @mapurls {
local (f = adrurl^, url = nameof (adrurl^));
if file.exists (f) {
add ("<sitemap>"); indentlevel++;
add ("<loc>" + encode (url) + "</loc>");
add ("<lastmod>" + lastmod (f) + "</lastmod>");
add ("</sitemap>"); indentlevel--}};
add ("</sitemapindex>"); indentlevel--;
writefile (sitefolder + sitemapindexfname, xmltext)}}};
bundle { //test code
sitemapGenerate ("Ohio:Server Backups:static sites:Fresca:listings.opml.org:", "http://listings.opml.org/", true)}
<<sitemapGenerate ("Ohio:scripting.com:", "http://scripting.com/")
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.