Monday, November 08, 2010 at 12:05 AM.

system.verbs.builtins.searchEngine.indexFolder

on indexFolder (folder, siteName, baseURL, adrStopWords=nil) {
	<<Index all the files in a folder and its subfolders.
	local (pc = file.getPathChar ());
	local (f);
	
	local (adrIndex = searchEngine.getIndexAddress (siteName));
	local (adrPreviews = searchEngine.getPreviewsAddress (siteName));
	
	if adrStopWords == nil {
		adrStopWords = @searchEngine.data.stopWords};
	
	bundle { //ensure that the folder path is valid
		if not (file.exists (folder)) {
			scriptError ("The folder " + folder + " does not exist.")};
		if not (file.isFolder (folder)) {
			scriptError ("The path " + folder + " is not a path to a folder.")}};
	
	bundle { //ensure that the baseURL is valid
		if not (baseURL endsWith "/") {
			scriptError ("Can't index this folder because the baseURL is not the URL of a directory.")}};
	
	fileloop (f in folder, infinity) { //loop through the folder, file by file
		local (fileType = string.lower (file.type (f)));
		fileType = string.popTrailing (fileType, ' ');
		
		case true { //watch out for text files that really aren't text files
			string.lower (f) endsWith ".css";
			string.lower (f) endsWith ".hqx" {
				continue}};
		
		case fileType { //is this a text file? If so, read it and index it.
			"html";
			"htm";
			"text";
			"txt" {
				local (s = string (file.readWholeFile (f)));
				local (title = html.getOneTagValue (s, "title"));
				local (bodyText = html.getOneTagValue (s, "body"));
				local (url);
				
				bundle { //ensure title and bodyText contain text
					if title == "" {
						title = "No Title"};
					if bodyText == "" {
						bodyText = s};
					
					if bodyText == "" {
						continue}};
				
				bundle { //get the URL of this file
					url = string.delete (f, 1, sizeOf (folder));
					url = string.replaceAll (url, pc, "/");
					url = baseURL + url};
				
				msg ("Search Engine: Indexing: " + url);
				
				<<Create a preview for this page.
					<<Always create a preview before indexing the page.
				searchEngine.createPreview (bodyText, title, url, f, adrPreviews, file.modified (f));
				
				<<Index the page.
				searchEngine.indexPage (f, url, title, bodyText, adrIndex, adrStopWords)}};
		
		sys.systemTask ()};
	
	<<Save the guest database containing the index.
	searchEngine.saveIndex (siteName, adrIndex);
	
	return (true)}



This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.