Monday, November 08, 2010 at 12:04 AM.
system.verbs.builtins.mainResponder.search.server.deIndexPage
on deIndexPage (url, flDialogs=false, indexName=nil) { <<Remove a page from the index. <<Changes: <<07/19/00; 1:14:58 AM by PBS <<Don't use string.nthField -- instead, always get the first word from the content string. Then delete that first word. Repeat until the string is "". msg ("Search Engine: de-indexing " + url + "..."); local (indexPath = mainResponder.search.utilities.getIndexPath (indexName)); local (adrPageInfo = @[indexPath].pageInfo.[url]); if defined (adrPageInfo^) { //this page was previously indexed local (content); if defined (adrPageInfo^.content) and adrPageInfo^.content != "" { content = string (adrPageInfo^.content)} else { if defined (adrPageInfo^.text) { content = adrPageInfo^.text}}; if content == nil { //the text hasn't been saved, use brute force searchEngine.deIndexPage (url, @[indexPath].index)} else { //smart de-indexing <<local (wordCount = string.countFields (content, ' ')) local (i); local (ct = 0); <<for i = 1 to wordCount <<local (oneWord = string.nthField (content, ' ', i)) << <<sys.systemTask () << <<if sizeOf (oneWord) > 0 <<local (letter = oneWord [1]) << <<oneWord = string.popTrailing (oneWord, 's') <<local (adrLetter = @[indexPath].index.[letter]) <<if defined (adrLetter^) <<local (adrWord = @adrLetter^.[oneWord]) <<if defined (adrWord^) //is this word in the index? <<local (adrRef = @adrWord^.[url]) <<if defined (adrRef^) //is this page in the index under this word? <<delete (adrRef) <<if sizeOf (adrWord^) == 0 <<delete (adrWord) <<if sizeOf (adrLetter^) == 0 <<delete (adrLetter) << <<msg (oneWord) while sizeOf (content) > 0 { //PBS 07/18/00: don't use string.nthField, but get the leading word, then delete from the content string content = string.trimWhiteSpace (content); local (oneWord = string.nthField (content, ' ', 1)); local (wordSize = sizeOf (oneWord)); oneWord = string.dropNonAlphas (oneWord); oneWord = string.lower (oneWord); oneWord = string.trimWhiteSpace (oneWord); oneWord = string.popTrailing (oneWord, 's'); //pop off trailing s's if oneWord == "" { content = string.delete (content, 1, wordSize); continue}; if sizeOf (oneWord) > 0 { local (letter = oneWord [1]); oneWord = string.popTrailing (oneWord, 's'); local (adrLetter = @[indexPath].index.[letter]); if defined (adrLetter^) { local (adrWord = @adrLetter^.[oneWord]); if defined (adrWord^) { //is this word in the index? local (adrRef = @adrWord^.[url]); if defined (adrRef^) { //is this page in the index under this word? delete (adrRef)}; if sizeOf (adrWord^) == 0 { delete (adrWord)}; if sizeOf (adrLetter^) == 0 { delete (adrLetter)}}}}; content = string.delete (content, 1, wordSize); }}} <<ct++ <<if ct > 500 //relax on occassion <<thread.sleepFor (0) <<ct = 0 else { if flDialogs { dialog.alert ("Can't de-index " + url + " because it isn't in the index.")}}; if defined (adrPageInfo^) { msg ("Search Engine: deleting page info for " + url + "..."); delete (adrPageInfo); if flDialogs { dialog.notify (url + " has been deleted from the index.")}}; msg (""); return (true)}
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.