Monday, November 08, 2010 at 12:04 AM.
system.verbs.builtins.mainResponder.search.server.getSnippet
on getSnippet (htmltext, words, flHtmlSnippets=true) { <<Changes <<8/7/02; 7:15:42 PM by JES <<Created. Take some html text and search terms and return a snippet of sample text from the page which contains the search terms, ala Google's snippets. words = string.replaceAll (words, "\"", ""); htmltext = string.replaceAll (htmltext, " ", " "); local (lowertext = string.lower (htmltext)); bundle { //strip everything before <body>, if present local (ixbody); ixbody = string.patternMatch ("<body", lowertext); if ixbody > 0 { htmltext = string.delete (htmltext, 1, ixbody - 1); lowertext = string.delete (lowertext, 1, ixbody - 1)}}; htmltext = searchEngine.stripMarkup (htmltext); lowertext = searchEngine.stripMarkup (lowertext); local (flFirstSnippet = true); on getOneSnippet (word) { local (lowerword = string.lower (word)); local (ixword = string.patternMatch (lowerword, lowertext)); if ixword > 0 { local (i, ix = ixword, flStartEllipses = true); for i = 1 to 50 { //try to find the start of the sentence if ix == 1 { break}; case htmltext[ix] { '.'; '!'; '?'; ':'; ')'; '\''; '\"'; ']' { ix++; flStartEllipses = false; break}}; ix--}; local (s = string.trimWhiteSpace (string.mid (htmltext, ix, 75 + i))); if flStartEllipses { local (firstword = string.nthField (s, " ", 1)); s = string.delete (s, 1, sizeOf (firstword))}; local (lastword = string.nthField (s, " ", string.countFields (s, " "))); s = string.trimWhiteSpace (string.mid (s, 1, sizeOf (s) - sizeOf (lastword))); if flStartEllipses and flFirstSnippet { s = "... " + s}; flFirstSnippet = false; s = s + " ..."; return (s)} else { return ("")}}; local (snippets = "", snippet); local (ctwords = string.countFields (words, " ")); local (i); for i = 1 to ctwords { //get snippets for the first 3 search terms if i > 3 { //only do first three search terms break}; snippet = getOneSnippet (string.nthField (words, " ", i)) + " "; if not (snippets contains string.mid (snippet, 1, 30)) { snippets = snippets + snippet}}; if flHtmlSnippets { //embolden search terms local (lowersnippet = string.lower (snippets)); for i = 1 to ctwords { local (lowerword = string.lower (string.nthField (words, " ", i))); lowersnippet = string.replaceAll (lowersnippet, lowerword, "<b>" + lowerword + "</b>")}; local (ct = sizeOf (lowersnippet) - 3); for i = 1 to ct { if lowersnippet[i] == '<' { if lowersnippet[i+1] == 'b' { snippets = string.insert ("<b>", snippets, i)} else { //closing tag snippets = string.insert ("</b>", snippets, i)}}}}; if flHtmlSnippets { //make ellipses more visible snippets = string.replaceAll (snippets + " ", "... ", "<b>...</b> ")}; //make ellipses more visible return (string.popTrailing (snippets, " "))} <<bundle //test code <<bundle //show snippet <<local (url = "http://127.0.0.1:8080/users/0000003/2002/07/11.html") <<local (snippet = getSnippet (pageinfo.[url].text, "john")) <<dialog.notify (snippet) <<local (folder = file.folderFromPath (frontier.getProgramPath ())) <<local (s = file.readWholeFile (folder + "Guest Databases:www:users:0000003:index.html")) <<bundle //show snippet <<local (snippet = getSnippet (s, "radio copyright")) <<dialog.notify (snippet) <<bundle //benchmark <<local (ticks = clock.ticks ()) <<for i = 1 to 10 <<getSnippet (s, "radio copyright") <<dialog.notify ((clock.ticks () - ticks) / 60.0)
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.