Monday, November 08, 2010 at 12:05 AM.
system.verbs.builtins.searchEngine.indexFolder
on indexFolder (folder, siteName, baseURL, adrStopWords=nil) { <<Index all the files in a folder and its subfolders. local (pc = file.getPathChar ()); local (f); local (adrIndex = searchEngine.getIndexAddress (siteName)); local (adrPreviews = searchEngine.getPreviewsAddress (siteName)); if adrStopWords == nil { adrStopWords = @searchEngine.data.stopWords}; bundle { //ensure that the folder path is valid if not (file.exists (folder)) { scriptError ("The folder " + folder + " does not exist.")}; if not (file.isFolder (folder)) { scriptError ("The path " + folder + " is not a path to a folder.")}}; bundle { //ensure that the baseURL is valid if not (baseURL endsWith "/") { scriptError ("Can't index this folder because the baseURL is not the URL of a directory.")}}; fileloop (f in folder, infinity) { //loop through the folder, file by file local (fileType = string.lower (file.type (f))); fileType = string.popTrailing (fileType, ' '); case true { //watch out for text files that really aren't text files string.lower (f) endsWith ".css"; string.lower (f) endsWith ".hqx" { continue}}; case fileType { //is this a text file? If so, read it and index it. "html"; "htm"; "text"; "txt" { local (s = string (file.readWholeFile (f))); local (title = html.getOneTagValue (s, "title")); local (bodyText = html.getOneTagValue (s, "body")); local (url); bundle { //ensure title and bodyText contain text if title == "" { title = "No Title"}; if bodyText == "" { bodyText = s}; if bodyText == "" { continue}}; bundle { //get the URL of this file url = string.delete (f, 1, sizeOf (folder)); url = string.replaceAll (url, pc, "/"); url = baseURL + url}; msg ("Search Engine: Indexing: " + url); <<Create a preview for this page. <<Always create a preview before indexing the page. searchEngine.createPreview (bodyText, title, url, f, adrPreviews, file.modified (f)); <<Index the page. searchEngine.indexPage (f, url, title, bodyText, adrIndex, adrStopWords)}}; sys.systemTask ()}; <<Save the guest database containing the index. searchEngine.saveIndex (siteName, adrIndex); return (true)}
This listing is for code that runs in the OPML Editor environment. I created these listings because I wanted the search engines to index it, so that when I want to look up something in my codebase I don't have to use the much slower search functionality in my object database. Dave Winer.