Monday, November 08, 2010 at 12:05 AM.
system.verbs.builtins.searchEngine.indexFolder
on indexFolder (folder, siteName, baseURL, adrStopWords=nil) {
<<Index all the files in a folder and its subfolders.
local (pc = file.getPathChar ());
local (f);
local (adrIndex = searchEngine.getIndexAddress (siteName));
local (adrPreviews = searchEngine.getPreviewsAddress (siteName));
if adrStopWords == nil {
adrStopWords = @searchEngine.data.stopWords};
bundle { //ensure that the folder path is valid
if not (file.exists (folder)) {
scriptError ("The folder " + folder + " does not exist.")};
if not (file.isFolder (folder)) {
scriptError ("The path " + folder + " is not a path to a folder.")}};
bundle { //ensure that the baseURL is valid
if not (baseURL endsWith "/") {
scriptError ("Can't index this folder because the baseURL is not the URL of a directory.")}};
fileloop (f in folder, infinity) { //loop through the folder, file by file
local (fileType = string.lower (file.type (f)));
fileType = string.popTrailing (fileType, ' ');
case true { //watch out for text files that really aren't text files
string.lower (f) endsWith ".css";
string.lower (f) endsWith ".hqx" {
continue}};
case fileType { //is this a text file? If so, read it and index it.
"html";
"htm";
"text";
"txt" {
local (s = string (file.readWholeFile (f)));
local (title = html.getOneTagValue (s, "title"));
local (bodyText = html.getOneTagValue (s, "body"));
local (url);
bundle { //ensure title and bodyText contain text
if title == "" {
title = "No Title"};
if bodyText == "" {
bodyText = s};
if bodyText == "" {
continue}};
bundle { //get the URL of this file
url = string.delete (f, 1, sizeOf (folder));
url = string.replaceAll (url, pc, "/");
url = baseURL + url};
msg ("Search Engine: Indexing: " + url);
<<Create a preview for this page.
<<Always create a preview before indexing the page.
searchEngine.createPreview (bodyText, title, url, f, adrPreviews, file.modified (f));
<<Index the page.
searchEngine.indexPage (f, url, title, bodyText, adrIndex, adrStopWords)}};
sys.systemTask ()};
<<Save the guest database containing the index.
searchEngine.saveIndex (siteName, adrIndex);
return (true)}
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.