Monday, November 08, 2010 at 12:07 AM.

system.verbs.builtins.xml.rss.getFeedItems

on getFeedItems (url, adritems, adrfeedinfo, adrtext=nil) {
	<<Changes
		<<5/6/10; 2:32:32 AM by DW
			<<A couple of fixes for stackoverflow.com Atom feeds. 
			<<Some Atom feeds don't use "content" to store the equiv of an RSS description, they use "summary" -- added a fix that accounts for this. Example -- stackoverflow.com. Also added a break in the loop that looks for the feed-level link, so we use the first one found as opposed to the last one. 
			<<Also on stackoverflow.com, they omit the "type" attribute on link elements. It seems they're the only ones who do this. Oy. Allow for it.
		<<2/4/10; 4:41:20 PM by DW
			<<Handle RDF-formatted feeds.
		<<11/5/09; 4:44:18 PM by DW
			<<Add optional parameter, adrtext. If not nil, use that text instead of reading it from the url. This saves multiple readings if we're sniffing at the text to figure out what it is.
		<<10/18/09; 6:36:06 PM by DW
			<<Although channel-level description is a required element, some feeds don't have it, so we default to the empty string if it's not present.
		<<9/17/09; 8:37:28 PM by DW
			<<Get the image from the feed, if it has one.
		<<9/16/09; 8:30:53 AM by DW
			<<Fix a dumb typo in the error message I just fixed. :-)
		<<9/16/09; 8:23:27 AM by DW
			<<Simplified the most common error message to remove these words: XML, RSS, Atom.
		<<9/16/09; 7:57:36 AM by DW
			<<Stop looking for the <lifeline> top-level element -- it didn't go anywhere. 
		<<9/15/09; 11:01:35 AM by DW
			<<Improve the error message if it's neither an RSS or Atom feed. Get the <category> element if one is present.
		<<9/4/09; 7:04:10 PM by DW
			<<Now supports Atom feeds. 
		<<7/21/09; 2:20:54 PM by DW
			<<Handle experimental "lifeLine" feeds.
		<<7/17/09; 11:09:04 AM by DW
			<<Cloud support. Implement a new sub-table of feedInfo table for each feed, cloud. It's present only if the feed has a cloud element.
		<<6/20/09; 7:40:14 PM by DW
			<<Run the text through xml.rss.data.replaceTable before compiling.
		<<6/16/09; 11:14:06 AM by DW
			<<Decode the entities in the strings. Don't create the feedInfo table if it's already been created.
		<<2/20/08; 10:40:21 AM by DW
			<<Created. A bottleneck for RSS 2.0 feed reading. Return most basic info about the feed, and a table containing the items sorted chronologically based on pubdate (if available), newest first.
	local (xstruct, adrrss = nil, adratomfeed = nil, adrrdf = nil, flrss = true, flatom = false, flrdf = false, adrchannel);
	on decode (s) {
		return (xml.entitydecode (s, flAlphaEntities:true))};
	bundle { //read feed into xstruct
		local (s);
		bundle { //get the text, 11/5/09 by DW
			if adrtext == nil {
				local (urllist);
				try {
					urllist = string.urlsplit (url)}
				else {
					urllist = string.urlsplit (url + "/")};
				s =  tcp.httpClient (server:urllist [2], path:urllist [3], ctFollowRedirects:5, flMessages:false);
				s = string.httpResultSplit (s)}
			else {
				s = string (adrtext^)}};
		s = string.multiplereplaceall (s, @xml.rss.data.replaceTable, false); //6/20/09 by DW
		xml.compile (s, @xstruct)};
		<<scratchpad.xstructFeedItems = xstruct
	bundle { //fill feedinto table, set adrchannel
		if not defined (adrfeedinfo^) {
			new (tabletype, adrfeedinfo)};
		bundle { //accept either <rss> or <feed> or <RDF>
			try {
				adrrss = xml.getaddress (@xstruct, "rss")}
			else {
				try {
					adratomfeed = xml.getaddress (@xstruct, "feed");
					flrss = false;
					flatom = true}
				else {
					try {
						adrrdf = xml.getaddress (@xstruct, "RDF"); //2/4/10 by DW
						adrrss = adrrdf;
						flrss = true;
						flrdf = true}
					else {
						scriptError ("Can't process the file because it doesn't appear to be a feed.")}}}};
		if flrss {
			adrchannel = xml.getaddress (adrrss, "channel");
			adrfeedinfo^.title = decode (xml.getvalue (adrchannel, "title"));
			adrfeedinfo^.link = decode (xml.getvalue (adrchannel, "link"));
			bundle { //description, 10/18/09 by DW
				try {
					adrfeedinfo^.description = decode (xml.getvalue (adrchannel, "description"))}
				else {
					adrfeedinfo^.description = ""}};
			bundle { //get category, 9/15/09 by DW
				try {
					adrfeedinfo^.category = decode (xml.getvalue (adrchannel, "category"))}
				else {
					adrfeedinfo^.category = ""}};
			bundle { //cloud support, 7/17/09 by DW
				try {
					local (adrcloud = xml.getaddress (adrchannel, "cloud"));
					on getatt (name) {
						try {
							return (xml.getattributevalue (adrcloud, name))}
						else {
							return ("")}};
					new (tabletype, @adrfeedinfo^.cloud);
					adrfeedinfo^.cloud.domain = getatt ("domain");
					adrfeedinfo^.cloud.path = getatt ("path");
					adrfeedinfo^.cloud.port = getatt ("port");
					adrfeedinfo^.cloud.protocol = getatt ("protocol");
					adrfeedinfo^.cloud.registerProcedure = getatt ("registerProcedure")}};
			bundle { //get image, 9/17/09 by DW
				try {
					local (adrimage = xml.getaddress (adrchannel, "image"));
					on getval (name, type=stringtype) {
						try {
							if type == longtype {
								return (number (xml.getvalue (adrimage, name)))}
							else {
								return (decode (xml.getvalue (adrimage, name)))}}
						else {
							if type == longtype {
								return (0)}
							else {
								return ("")}}};
					new (tabletype, @adrfeedinfo^.image);
					adrfeedinfo^.image.url = getval ("url");
					adrfeedinfo^.image.title = getval ("title");
					adrfeedinfo^.image.link = getval ("link");
					adrfeedinfo^.image.height = getval ("height", longtype);
					adrfeedinfo^.image.width = getval ("width", longtype)}}}};
	if flrss { //fill the items table
		local (sorttable, adritemscontainer);
		bundle { //fill sorttable
			local (i, adritem, now = clock.now (), pubdate);
			adritemscontainer = adrchannel;
			if flrdf {
				adritemscontainer = adrrdf};
			new (tabletype, @sorttable);
			for i = 1 to sizeof (adritemscontainer^) {
				adritem = @adritemscontainer^ [i];
				if nameof (adritem^) endswith "item" {
					try {
						pubdate = date (xml.getvalue (adritem, "pubDate"))}
					else {
						pubdate = now};
					sorttable.[string (i)] = pubdate}};
			local (oldtarget = target.set (@sorttable));
			table.sortby ("Value");
			target.set (oldtarget)};
			<<scratchpad.sorrtable = sorttable
		local (i, adritem, ct=0, ix);
		new (tabletype, adritems);
		for i = sizeof (sorttable) downto 1 {
			ix = number (nameof (sorttable [i]));
			adritem = @adritemscontainer^ [ix];
			adritems^.[string.padwithzeros (++ct, 3)] = adritem^};
		return (true)}; //finished dealing with RSS
	if flatom {
		adrfeedinfo^.title = decode (xml.getvalue (adratomfeed, "title"));
		bundle { //get description from subtitle, if available
			try {
				adrfeedinfo^.description = decode (xml.getvalue (adratomfeed, "subtitle"))}
			else {
				adrfeedinfo^.description = ""}};
		bundle { //loop to get the link
			local (adr);
			for adr in adratomfeed {
				if nameof (adr^) endswith "link" {
					if xml.getattributevalue (adr, "rel") == "alternate" {
						if xml.getattributevalue (adr, "type") == "text/html" {
							adrfeedinfo^.link = decode (xml.getattributevalue (adr, "href"));
							break}}}}}; //5/6/10 by DW
		bundle { //loop to get items
			local (sorttable, now = clock.now ());
			bundle { //fill sorttable
				local (i, adr, pubdate);
				new (tabletype, @sorttable);
				for i = 1 to sizeof (adratomfeed^) {
					adr = @adratomfeed^ [i];
					if nameof (adr^) endswith "entry" {
						try {
							pubdate = date.iso8601StringToDate (xml.getvalue (adr, "published"))}
						else {
							pubdate = now};
						sorttable.[string (i)] = pubdate}};
				local (oldtarget = target.set (@sorttable));
				table.sortby ("Value");
				target.set (oldtarget)};
				<<scratchpad.sorrtable = sorttable
			local (i, adritem, ct=0, ix, adrsub);
			new (tabletype, adritems);
			for i = sizeof (sorttable) downto 1 {
				ix = number (nameof (sorttable [i]));
				adritem = @adratomfeed^ [ix];
				adrsub = @adritems^.[string.padwithzeros (++ct, 3)];
				new (tabletype, adrsub);
				xml.addvalue (adrsub, "title", xml.getvalue (adritem, "title"));
				bundle { //get the description, 5/6/10 by DW
					local (s);
					try {
						s = xml.getvalue (adritem, "content")}
					else {
						s = xml.getvalue (adritem, "summary")};
					xml.addvalue (adrsub, "description", s)};
				xml.addvalue (adrsub, "pubDate", date.netstandardstring (sorttable [i]));
				bundle { //loop to get the link
					local (adr);
					on getlinkatttype (adr) { //5/6/10 by DW
						try {
							return (xml.getattributevalue (adr, "type"))}
						else {
							return ("text/html")}};
					for adr in adritem {
						if nameof (adr^) endswith "link" {
							if xml.getattributevalue (adr, "rel") == "alternate" {
								if getlinkatttype (adr) == "text/html" {
									xml.addvalue (adrsub, "link", xml.getattributevalue (adr, "href"))}}}}};
				bundle { //set guid
					local (adrguid = xml.addtable (adrsub, "guid"));
					new (tabletype, @adrguid^.["/atts"]);
					adrguid^.["/atts"].isPermaLink = false;
					adrguid^.["/pcdata"] = xml.getvalue (adritem, "id")}};
			return (true)}}}; //finished dealing with RSS
bundle { //test code
	local (url = "http://www.npr.org/rss/podcast.php?id=13");
	url = "http://feeds.feedburner.com/inquisitr";
	url = "http://www.nytimes.com/services/xml/rss/userland/HomePage.xml";
	url = "http://static.lifeliner.org/dave/rss.xml";
	url = "http://annarbor.com/rss.xml";
	url = config.lifeLiner.stats.rssUrl;
	url = "http://halleyscomment.blogspot.com/feeds/posts/default"; //atom feed
	url = "http://news.google.com/news?ned=us&topic=h&output=atom";
	url = "http://howto.disqus.com/howto_rebooting_the_rss_cloud_88/latest.rss";
	url = "http://static.flickrfan.org/afp/fashion.xml";
	url = "http://static.lifeliner.org/lifeliner/rss.xml";
	url = "http://www.lockhartsteele.com/below14/index.rdf";
	url = "http://stackoverflow.com/feeds/tag/rss";
	if not defined (scratchpad.feed) {
		new (tabletype, @scratchpad.feed)};
	getfeeditems (url, @scratchpad.feed.items, @scratchpad.feed.info)}



This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.