Monday, November 08, 2010 at 12:04 AM.
system.verbs.builtins.mainResponder.search.server.getSnippet
on getSnippet (htmltext, words, flHtmlSnippets=true) {
<<Changes
<<8/7/02; 7:15:42 PM by JES
<<Created. Take some html text and search terms and return a snippet of sample text from the page which contains the search terms, ala Google's snippets.
words = string.replaceAll (words, "\"", "");
htmltext = string.replaceAll (htmltext, " ", " ");
local (lowertext = string.lower (htmltext));
bundle { //strip everything before <body>, if present
local (ixbody);
ixbody = string.patternMatch ("<body", lowertext);
if ixbody > 0 {
htmltext = string.delete (htmltext, 1, ixbody - 1);
lowertext = string.delete (lowertext, 1, ixbody - 1)}};
htmltext = searchEngine.stripMarkup (htmltext);
lowertext = searchEngine.stripMarkup (lowertext);
local (flFirstSnippet = true);
on getOneSnippet (word) {
local (lowerword = string.lower (word));
local (ixword = string.patternMatch (lowerword, lowertext));
if ixword > 0 {
local (i, ix = ixword, flStartEllipses = true);
for i = 1 to 50 { //try to find the start of the sentence
if ix == 1 {
break};
case htmltext[ix] {
'.';
'!';
'?';
':';
')';
'\'';
'\"';
']' {
ix++;
flStartEllipses = false;
break}};
ix--};
local (s = string.trimWhiteSpace (string.mid (htmltext, ix, 75 + i)));
if flStartEllipses {
local (firstword = string.nthField (s, " ", 1));
s = string.delete (s, 1, sizeOf (firstword))};
local (lastword = string.nthField (s, " ", string.countFields (s, " ")));
s = string.trimWhiteSpace (string.mid (s, 1, sizeOf (s) - sizeOf (lastword)));
if flStartEllipses and flFirstSnippet {
s = "... " + s};
flFirstSnippet = false;
s = s + " ...";
return (s)}
else {
return ("")}};
local (snippets = "", snippet);
local (ctwords = string.countFields (words, " "));
local (i);
for i = 1 to ctwords { //get snippets for the first 3 search terms
if i > 3 { //only do first three search terms
break};
snippet = getOneSnippet (string.nthField (words, " ", i)) + " ";
if not (snippets contains string.mid (snippet, 1, 30)) {
snippets = snippets + snippet}};
if flHtmlSnippets { //embolden search terms
local (lowersnippet = string.lower (snippets));
for i = 1 to ctwords {
local (lowerword = string.lower (string.nthField (words, " ", i)));
lowersnippet = string.replaceAll (lowersnippet, lowerword, "<b>" + lowerword + "</b>")};
local (ct = sizeOf (lowersnippet) - 3);
for i = 1 to ct {
if lowersnippet[i] == '<' {
if lowersnippet[i+1] == 'b' {
snippets = string.insert ("<b>", snippets, i)}
else { //closing tag
snippets = string.insert ("</b>", snippets, i)}}}};
if flHtmlSnippets { //make ellipses more visible
snippets = string.replaceAll (snippets + " ", "... ", "<b>...</b> ")}; //make ellipses more visible
return (string.popTrailing (snippets, " "))}
<<bundle //test code
<<bundle //show snippet
<<local (url = "http://127.0.0.1:8080/users/0000003/2002/07/11.html")
<<local (snippet = getSnippet (pageinfo.[url].text, "john"))
<<dialog.notify (snippet)
<<local (folder = file.folderFromPath (frontier.getProgramPath ()))
<<local (s = file.readWholeFile (folder + "Guest Databases:www:users:0000003:index.html"))
<<bundle //show snippet
<<local (snippet = getSnippet (s, "radio copyright"))
<<dialog.notify (snippet)
<<bundle //benchmark
<<local (ticks = clock.ticks ())
<<for i = 1 to 10
<<getSnippet (s, "radio copyright")
<<dialog.notify ((clock.ticks () - ticks) / 60.0)
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.