Monday, November 08, 2010 at 12:04 AM.

system.verbs.builtins.mainResponder.search.server.deIndexPage

on deIndexPage (url, flDialogs=false, indexName=nil) {
	<<Remove a page from the index.
		<<Changes:
			<<07/19/00; 1:14:58 AM by PBS
				<<Don't use string.nthField -- instead, always get the first word from the content string. Then delete that first word. Repeat until the string is "".
	
	msg ("Search Engine: de-indexing " + url + "...");
	
	local (indexPath = mainResponder.search.utilities.getIndexPath (indexName));
	local (adrPageInfo = @[indexPath].pageInfo.[url]);
	
	if defined (adrPageInfo^) { //this page was previously indexed
		local (content);
		
		if defined (adrPageInfo^.content) and adrPageInfo^.content != "" {
			content = string (adrPageInfo^.content)}
		else {
			if defined (adrPageInfo^.text) {
				content = adrPageInfo^.text}};
		
		if content == nil { //the text hasn't been saved, use brute force
			searchEngine.deIndexPage (url, @[indexPath].index)}
		else { //smart de-indexing
			<<local (wordCount = string.countFields (content, ' '))
			local (i);
			local (ct = 0);
			<<for i = 1 to wordCount
				<<local (oneWord = string.nthField (content, ' ', i))
				<<
				<<sys.systemTask ()
				<<
				<<if sizeOf (oneWord) > 0
					<<local (letter = oneWord [1])
					<<
					<<oneWord = string.popTrailing (oneWord, 's')
					<<local (adrLetter = @[indexPath].index.[letter])
					<<if defined (adrLetter^)
						<<local (adrWord = @adrLetter^.[oneWord])
						<<if defined (adrWord^) //is this word in the index?
							<<local (adrRef = @adrWord^.[url])
							<<if defined (adrRef^) //is this page in the index under this word?
								<<delete (adrRef)
							<<if sizeOf (adrWord^) == 0
								<<delete (adrWord)
							<<if sizeOf (adrLetter^) == 0
								<<delete (adrLetter)
					<<
					<<msg (oneWord)
			while sizeOf (content) > 0 { //PBS 07/18/00: don't use string.nthField, but get the leading word, then delete from the content string
				content = string.trimWhiteSpace (content);
				local (oneWord = string.nthField (content, ' ', 1));
				local (wordSize = sizeOf (oneWord));
				oneWord = string.dropNonAlphas (oneWord);
				oneWord = string.lower (oneWord);
				oneWord = string.trimWhiteSpace (oneWord);
				oneWord = string.popTrailing (oneWord, 's'); //pop off trailing s's
				if oneWord == "" {
					content = string.delete (content, 1, wordSize);
					continue};
				if sizeOf (oneWord) > 0 {
					local (letter = oneWord [1]);
					
					oneWord = string.popTrailing (oneWord, 's');
					local (adrLetter = @[indexPath].index.[letter]);
					if defined (adrLetter^) {
						local (adrWord = @adrLetter^.[oneWord]);
						if defined (adrWord^) { //is this word in the index?
							local (adrRef = @adrWord^.[url]);
							if defined (adrRef^) { //is this page in the index under this word?
								delete (adrRef)};
							if sizeOf (adrWord^) == 0 {
								delete (adrWord)};
							if sizeOf (adrLetter^) == 0 {
								delete (adrLetter)}}}};
				
				content = string.delete (content, 1, wordSize);
				}}}
				<<ct++
				<<if ct > 500 //relax on occassion
					<<thread.sleepFor (0)
					<<ct = 0
	else {
		if flDialogs {
			dialog.alert ("Can't de-index " + url + " because it isn't in the index.")}};
	
	if defined (adrPageInfo^) {
		msg ("Search Engine: deleting page info for " + url + "...");
		delete (adrPageInfo);
		if flDialogs {
			dialog.notify (url + " has been deleted from the index.")}};
	
	msg ("");
	
	return (true)}



This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.