Monday, November 08, 2010 at 12:04 AM.
system.verbs.builtins.op.rssToOutline
on rssToOutline (xmltext, adroutline, url="") {
<<Changes:
<<2/17/06; 3:05:41 PM by DM
<<Added conversion from UTF-8 to ANSI unless ISO-8859-1 is specified in the XML declaration because UTF-8 is the default encoding for XML.
<<Changed the display of item descriptions so that they display as sub-items of the title field (or some substitute where there is no title field). This keeps all the paragraphs together.
<<bundle // old code
<<on rssToOutline (xmltext, adroutline, url="")
<<Changes:
<<11/23/01; 3:22:28 PM by JES
<<Re-checked in to fix a problem (also now fixed) caused by userland.cleanPikeRoot.
<<10/11/01; 2:40:28 PM by PBS
<<If no url item is present, try to get it from the description.
<<8/10/01; 7:21:39 PM by JES
<<Support RSS 0.92.
<<9/27/00 at 6:23:24 PM by JES
<<Commented debugging code.
<<9/27/00 at 6:03:59 PM by JES
<<Decode XML entities in URLs when setting the link node's URL attribute.
<<8/16/00 at 2:24:14 PM by DW
<<Parse the xmltext as a RSS 0.91 channel, into the outline. If url is nonempty, add it as an attribute to the main headline.
<<local (xtable)
<<new (outlinetype, adroutline)
<<xml.compile (xmltext, @xtable)
<<
<<on decode (s)
<<s = string.replaceall (s, "'", "'")
<<s = string.replaceall (s, """, "\"")
<<s = string.replaceAll (s, "\r\n", "\r")
<<s = string.replaceAll (s, "\n", "\r")
<<s = string.replaceAll (s, "\t", "")
<<return (xml.entityDecode (s))
<<local (version)
<<try
<<version = xml.getAttribute (adrRss, "version")^
<<local (adrrss)
<<try
<<adrrss = xml.getAddress (@xtable, "rss")
<<else //it may be RDF
<<adrrss = xml.getAddress (@xtable, "RDF")
<<version = "RSS1"
<<local (adrchannel = xml.getAddress (adrRss, "channel"))
<<local (title = decode (xml.getValue (adrchannel, "title")))
<<local (channeldescription = decode (xml.getValue (adrchannel, "description")))
<<local (channellanguage, channellink)
<<try {channellink = decode (xml.getValue (adrchannel, "link"))}
<<try {channellanguage = decode (xml.getValue (adrchannel, "language"))}
<<
<<local (oldtarget = target.set (adroutline))
<<op.setlinetext (title)
<<local (atts)
<<new (tabletype, @atts)
<<atts.type = "rss"
<<atts.version = version
<<atts.title = title
<<if channellink != nil
<<atts.htmlUrl = channellink
<<if url != ""
<<atts.xmlUrl = url
<<atts.description = channeldescription
<<atts.language = channellanguage
<<op.attributes.addgroup (@atts)
<<bundle //get the items
<<local (itemlist, item, ct = 1, insertdir = right)
<<if version == "RSS1"
<<itemlist = xml.getAddressList (adrrss, "item")
<<else
<<itemlist = xml.getAddressList (adrchannel, "item")
<<for item in itemlist
<<local (url, title, description)
<<try //url is optional in RSS 0.92
<<url = decode (xml.getValue (item, "link"))
<<try //title is optional in RSS 0.92
<<title = decode (xml.getValue (item, "title"))
<<try //description is optional in RSS 0.92
<<description = decode (xml.getValue (item, "description"))
<<
<<if description == nil
<<if title == nil //no title or description -- insert the link if there is one
<<if url == nil
<<continue
<<op.insert (url, insertdir)
<<else
<<op.insert (title, insertdir)
<<else
<<bundle //make the description look nice in an outline
<<bundle //this code is too slow -- don't bother to strip markup
<<on replaceTag (tagName, replaceStartTagWith, replaceEndDagWith = "")
<<local (searchString = "<" + string.lower (tagName))
<<local (ct = sizeOf (searchString))
<<local (descriptionSize = sizeOf (description))
<<local (ix = descriptionSize - ct)
<<while ix > 0
<<if string.lower (string.mid (description, ix, ct)) == searchString //this is a hit
<<local (ixend = ix + ct)
<<while ixend <= descriptionSize
<<if description[ixend] == '>' //delete up to here
<<description = string.delete (description, ix, ixend - ix + 1)
<<descriptionSize = sizeOf (description)
<<break
<<ixend++
<<ix--
<<replaceTag ("br", "\r")
<<replaceTag ("p", "\r")
<<replaceTag ("li", "\t\r")
<<replaceTag ("blockquote", "")
<<replaceTag ("ol", "")
<<replaceTag ("ul", "")
<<description = string.replaceAll (description, "<br>", "\r", false)
<<description = string.replaceAll (description, "<p>", "\r", false)
<<description = string.replaceAll (description, "</p>", "", false)
<<while description contains "\r\r"
<<description = string.replaceAll (description, "\r\r", "\r")
<<description = string.trimWhitespace (description)
<<if title == nil
<<op.insert (description, insertdir)
<<else
<<op.insert (title + " -- " + description, insertdir)
<<
<<if url == nil //PBS 10/11/01: try to get it from the description
<<if description != nil
<<local (ix = string.patternMatch ("<a ", string.lower (description))) //find first link
<<if ix > 0
<<local (s = string.mid (description, ix, infinity))
<<ix = string.patternMatch (">", string.lower (s)) //find end of first link
<<if ix > 0
<<s = string.mid (s, 1, ix - 1) //drop trailing >
<<s = s + "/>" //make it XML legal, as in <a href="foo"/>
<<local (t) //table will contain tag as compiled XML
<<new (tableType, @t)
<<try //compile it as XML, then pull out the href attribute
<<xml.compile (s, @t)
<<url = t [1].["/atts"].href //get href attribute
<<
<<if url != nil
<<local (atts)
<<new (tabletype, @atts)
<<atts.type = "link"
<<atts.url = url
<<op.attributes.addgroup (@atts)
<<
<<insertdir = down
<<
<<op.firstsummit ()
<<window.setposition (adroutline, 75, 125)
<<window.setsize (adroutline, 400, 450)
<<target.set (oldtarget)
<<11/23/01; 3:22:28 PM by JES
<<Re-checked in to fix a problem (also now fixed) caused by userland.cleanPikeRoot.
<<10/11/01; 2:40:28 PM by PBS
<<If no url item is present, try to get it from the description.
<<8/10/01; 7:21:39 PM by JES
<<Support RSS 0.92.
<<9/27/00 at 6:23:24 PM by JES
<<Commented debugging code.
<<9/27/00 at 6:03:59 PM by JES
<<Decode XML entities in URLs when setting the link node's URL attribute.
<<8/16/00 at 2:24:14 PM by DW
<<Parse the xmltext as a RSS 0.91 channel, into the outline. If url is nonempty, add it as an attribute to the main headline.
local (xtable);
new (outlinetype, adroutline);
xml.compile (xmltext, @xtable);
on decode (s) {
s = string.replaceall (s, "'", "'");
s = string.replaceall (s, """, "\"");
s = string.replaceAll (s, "\r\n", "\r");
s = string.replaceAll (s, "\n", "\r");
s = string.replaceAll (s, "\t", "");
return (xml.entityDecode (s))};
local (version);
try {
version = xml.getAttribute (adrRss, "version")^};
local (adrrss);
try {
adrrss = xml.getAddress (@xtable, "rss")}
else { //it may be RDF
adrrss = xml.getAddress (@xtable, "RDF");
version = "RSS1"};
local (flISO = false); // 2/17/06; 3:05:41 PM by DM
try {
adrxml = (xml.getAddress (@xtable, "?xml"));
encoding = string.lower(xmlAdr^.encoding);
if (encoding == "iso-8859-1") {
flISO = true}};
local (adrchannel = xml.getAddress (adrRss, "channel"));
local (title = decode (xml.getValue (adrchannel, "title")));
local (channeldescription = decode (xml.getValue (adrchannel, "description")));
local (channellanguage, channellink);
try {channellink = decode (xml.getValue (adrchannel, "link"))};
try {channellanguage = decode (xml.getValue (adrchannel, "language"))};
if not flISO { // 2/17/06; 3:05:41 PM by DM
title = string.utf8toAnsi(title);
channeldescription = string.utf8toAnsi(channeldescription)};
local (oldtarget = target.set (adroutline));
op.setlinetext (title);
local (atts);
new (tabletype, @atts);
atts.type = "rss";
atts.version = version;
atts.title = title;
if channellink != nil {
atts.htmlUrl = channellink};
if url != "" {
atts.xmlUrl = url};
atts.description = channeldescription;
atts.language = channellanguage;
op.attributes.addgroup (@atts);
bundle { //get the items
local (itemlist, item, ct = 1, insertdir = right);
if version == "RSS1" {
itemlist = xml.getAddressList (adrrss, "item")}
else {
itemlist = xml.getAddressList (adrchannel, "item")};
for item in itemlist {
local (url, title, description, pubDate); // 2/17/06; 3:05:41 PM by DM
try { //url is optional in RSS 0.92
url = decode (xml.getValue (item, "link"))};
try { //title is optional in RSS 0.92
title = decode (xml.getValue (item, "title"))};
try { //description is optional in RSS 0.92
description = decode (xml.getValue (item, "description"))};
try { // pubDate
pubDate = decode (xml.getValue (item, "pubDate"))};
if not flISO { // 2/17/06; 3:05:41 PM by DM
title = string.utf8toAnsi(title);
description = string.utf8toAnsi(description)};
if description == nil {
if title == nil { //no title or description -- insert the link if there is one
if url == nil {
continue};
op.insert (url, insertdir)}
else {
op.insert (title, insertdir)}}
else {
bundle { //make the description look nice in an outline
<<bundle //this code is too slow -- don't bother to strip markup
<<on replaceTag (tagName, replaceStartTagWith, replaceEndDagWith = "")
<<local (searchString = "<" + string.lower (tagName))
<<local (ct = sizeOf (searchString))
<<local (descriptionSize = sizeOf (description))
<<local (ix = descriptionSize - ct)
<<while ix > 0
<<if string.lower (string.mid (description, ix, ct)) == searchString //this is a hit
<<local (ixend = ix + ct)
<<while ixend <= descriptionSize
<<if description[ixend] == '>' //delete up to here
<<description = string.delete (description, ix, ixend - ix + 1)
<<descriptionSize = sizeOf (description)
<<break
<<ixend++
<<ix--
<<replaceTag ("br", "\r")
<<replaceTag ("p", "\r")
<<replaceTag ("li", "\t\r")
<<replaceTag ("blockquote", "")
<<replaceTag ("ol", "")
<<replaceTag ("ul", "")
description = string.replaceAll (description, "<br>", "\r", false);
description = string.replaceAll (description, "<p>", "\r", false);
description = string.replaceAll (description, "</p>", "", false);
while description contains "\r\r" {
description = string.replaceAll (description, "\r\r", "\r")};
description = string.trimWhitespace (description)};
if title == nil { // 2/17/06; 3:05:41 PM by DM - changed
if pubDate == nil {
op.insert ("Untitled item", insertdir)}
else {
op.insert (pubDate, insertdir)};
op.insert (description, right);
op.go (left, 1)}
else { // 2/17/06; 3:05:41 PM by DM - changed
<<op.insert (title + " -- " + description, insertdir)
op.insert (title, insertdir);
op.insert (description, right);
op.go (left, 1)}};
if url == nil { //PBS 10/11/01: try to get it from the description
if description != nil {
local (ix = string.patternMatch ("<a ", string.lower (description))); //find first link
if ix > 0 {
local (s = string.mid (description, ix, infinity));
ix = string.patternMatch (">", string.lower (s)); //find end of first link
if ix > 0 {
s = string.mid (s, 1, ix - 1); //drop trailing >
s = s + "/>"; //make it XML legal, as in <a href="foo"/>
local (t); //table will contain tag as compiled XML
new (tableType, @t);
try { //compile it as XML, then pull out the href attribute
xml.compile (s, @t);
url = t [1].["/atts"].href}}}}}; //get href attribute
if url != nil {
local (atts);
new (tabletype, @atts);
atts.type = "link";
atts.url = url;
op.attributes.addgroup (@atts)};
insertdir = down}};
op.firstsummit ();
window.setposition (adroutline, 75, 125);
window.setsize (adroutline, 400, 450);
target.set (oldtarget)}
<<bundle //test code
<<local (url = "http://www.palmstation.com/palmstation.rdf")
<<local (s = tcp.httpReadUrl (url))
<<wp.newtextobject (s, @scratchpad.rsstext)
<<rssToOutline (string (scratchpad.rsstext), @scratchpad.rssOutline, url)
<<edit (@scratchpad.rssOutline)
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.