diff -ru htdig-3.1.3/htcommon/DocumentDB.cc htdig-3.1.3.patched/htcommon/DocumentDB.cc --- htdig-3.1.3/htcommon/DocumentDB.cc Thu Apr 22 06:47:57 1999 +++ htdig-3.1.3.patched/htcommon/DocumentDB.cc Sat Nov 20 20:38:12 1999 @@ -217,7 +217,8 @@ while ((key = dbf->Get_Next())) { dbf->Get(key, data); - if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0) + if (strncmp(HtURLCodec::instance()->decode(key), "http:", 5) == 0 || + strncmp(HtURLCodec::instance()->decode(key), "file:", 5) == 0) { ref = new DocumentRef; ref->Deserialize(data); @@ -284,7 +285,8 @@ while ((coded_key = dbf->Get_Next())) { String key = HtURLCodec::instance()->decode(coded_key); - if (mystrncasecmp(key, "http:", 5) == 0) + if (mystrncasecmp(key, "http:", 5) == 0 || + mystrncasecmp(key, "file:", 5) == 0) { DocumentRef *ref = (*this)[key]; if (ref) diff -ru htdig-3.1.3/htdig/HTML.cc htdig-3.1.3.patched/htdig/HTML.cc --- htdig-3.1.3/htdig/HTML.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htdig/HTML.cc Sat Nov 20 20:38:12 1999 @@ -649,13 +649,19 @@ break; case 16: // "noindex" - doindex = 0; - dofollow = 0; + if (!config.Boolean("ignore_noindex")) + { + doindex = 0; + dofollow = 0; + } break; case 17: // "/noindex" - doindex = 1; - dofollow = 1; + if (!config.Boolean("ignore_noindex")) + { + doindex = 1; + dofollow = 1; + } break; case 18: // "img" @@ -868,7 +874,8 @@ { String content_cache = conf["content"]; - if (content_cache.indexOf("noindex") != -1) + if (content_cache.indexOf("noindex") != -1 && + !config.Boolean("ignore_noindex")) { doindex = 0; retriever.got_noindex(); diff -ru htdig-3.1.3/htdig/Retriever.cc htdig-3.1.3.patched/htdig/Retriever.cc --- htdig-3.1.3/htdig/Retriever.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htdig/Retriever.cc Sat Nov 20 20:39:41 1999 @@ -150,7 +150,7 @@ { if (debug > 2) cout << " pushed"; - server->push(u.get(), 0, 0); + server->push(u.get(), 0, 0, IsLocal(url.get())); } if (debug > 2) cout << endl; @@ -395,9 +395,9 @@ // Retrive document, first trying local file access if possible. Document::DocStatus status; - String *local_filename = IsLocalUser(url.get()); + String *local_filename = GetLocalUser(url.get()); if (!local_filename) - local_filename = IsLocal(url.get()); + local_filename = GetLocal(url.get()); if (local_filename) { if (debug > 1) @@ -625,7 +625,7 @@ // Currently, we only deal with HTTP URLs. Gopher and ftp will // come later... ***FIX*** // - if (strstr(u, "/../") || strncmp(u, "http://", 7) != 0) + if (strstr(u, "/../") || (strncmp(u, "http://", 7) != 0 && strncmp(u, "file://", 7) != 0)) { if (debug > 2) cout << endl <<" Rejected: Not an http or relative link!"; @@ -688,13 +688,13 @@ //***************************************************************************** -// String* Retriever::IsLocal(char *url) +// String* Retriever::GetLocal(char *url) // Returns a string containing the (possible) local filename // of the given url, or 0 if it's definitely not local. // THE CALLER MUST FREE THE STRING AFTER USE! // String* -Retriever::IsLocal(char *url) +Retriever::GetLocal(char *url) { static StringList *prefixes = 0; static StringList *paths = 0; @@ -747,14 +747,14 @@ //***************************************************************************** -// String* Retriever::IsLocalUser(char *url) +// String* Retriever::GetLocalUser(char *url) // If the URL has ~user part, returns a string containing the // (possible) local filename of the given url, or 0 if it's // definitely not local. // THE CALLER MUST FREE THE STRING AFTER USE! // String* -Retriever::IsLocalUser(char *url) +Retriever::GetLocalUser(char *url) { static StringList *prefixes = 0, *paths = 0, *dirs = 0; static Dictionary home_cache; @@ -1085,7 +1085,8 @@ // Let's just be sure we're not pushing an empty URL // if (strlen(url.get())) - server->push(url.get(), ref->DocHopCount(), base->get()); + server->push(url.get(), ref->DocHopCount(), base->get(), + IsLocal(url.get())); String temp = url.get(); visited.Add(temp, 0); @@ -1209,7 +1210,8 @@ server = new Server(url.host(), url.port()); servers.Add(url.signature(), server); } - server->push(url.get(), ref->DocHopCount(), base->get()); + server->push(url.get(), ref->DocHopCount(), base->get(), + IsLocal(url.get())); String temp = url.get(); visited.Add(temp, 0); @@ -1357,3 +1359,16 @@ } } +int +Retriever::IsLocal(char *url) +{ + int ret; + + String *local_filename = GetLocalUser(url); + if (!local_filename) + local_filename = GetLocal(url); + ret = (local_filename != 0); + delete local_filename; + + return ret; +} diff -ru htdig-3.1.3/htdig/Retriever.h htdig-3.1.3.patched/htdig/Retriever.h --- htdig-3.1.3/htdig/Retriever.h Thu Apr 22 06:47:57 1999 +++ htdig-3.1.3.patched/htdig/Retriever.h Sat Nov 20 20:38:12 1999 @@ -64,6 +64,11 @@ // username/password // void setUsernamePassword(char *credentials); + + // + // Check is url local + // + int IsLocal(char *url); private: // @@ -118,8 +123,8 @@ int Need2Get(char *url); DocumentRef * GetRef(char *url); int IsValidURL(char *url); - String * IsLocal(char *url); - String * IsLocalUser(char *url); + String * GetLocal(char *url); + String * GetLocalUser(char *url); void RetrievedDocument(Document &, char *url, DocumentRef *ref); void parse_url(URLRef &urlRef); void got_redirect(char *, DocumentRef *); diff -ru htdig-3.1.3/htdig/Server.cc htdig-3.1.3.patched/htdig/Server.cc --- htdig-3.1.3/htdig/Server.cc Thu Apr 22 06:47:57 1999 +++ htdig-3.1.3.patched/htdig/Server.cc Sat Nov 20 20:38:12 1999 @@ -231,9 +231,9 @@ //***************************************************************************** // void Server::push(char *path, int hopcount, char *referer) // -void Server::push(char *path, int hopcount, char *referer) +void Server::push(char *path, int hopcount, char *referer, bool local) { - if (_bad_server) + if (_bad_server && !local) return; // diff -ru htdig-3.1.3/htdig/Server.h htdig-3.1.3.patched/htdig/Server.h --- htdig-3.1.3/htdig/Server.h Thu Apr 22 06:47:57 1999 +++ htdig-3.1.3.patched/htdig/Server.h Sat Nov 20 20:38:12 1999 @@ -56,7 +56,7 @@ // see if the path in the path is allowed. If it isn't allowed, // it simply won't be added. // - void push(char *path, int hopcount, char *referer); + void push(char *path, int hopcount, char *referer, bool local = false); // // Return the next URL from the queue for this server. diff -ru htdig-3.1.3/htdig/htdig.cc htdig-3.1.3.patched/htdig/htdig.cc --- htdig-3.1.3/htdig/htdig.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htdig/htdig.cc Sat Nov 20 20:38:12 1999 @@ -20,6 +20,8 @@ #include #endif +#include + // // Global variables // @@ -245,6 +247,16 @@ if (credentials.length()) retriever.setUsernamePassword(credentials); retriever.Initial(config["start_url"], 1); + + if (optind < ac && !strcmp(av[optind], "-")) + { + String str; + while (cin >> str) + { + str.chop('\n'); + retriever.Initial(str, 1); + } + } // // Go do it! diff -ru htdig-3.1.3/htlib/String.cc htdig-3.1.3.patched/htlib/String.cc --- htdig-3.1.3/htlib/String.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htlib/String.cc Sat Nov 20 20:38:12 1999 @@ -204,6 +204,7 @@ return result; left -= result; + wptr += result; } return left; } @@ -544,6 +545,35 @@ return o; } +istream &operator >> (istream &is, String &s) +{ + int w = is.width (0); + if (is.ipfx0 ()) + { + register streambuf *sb = is.rdbuf (); + s.Length = 0; + while (1) + { + int ch = sb->sbumpc (); + if (ch == EOF) + { + is.setstate (ios::eofbit); + break; + } + s << static_cast (ch); + if (ch == '\n') + break; + if (--w == 1) + break; + } + } + + is.isfx (); + if (s.length () == 0) + is.setstate (ios::failbit); + + return is; +} //------------------------------------------------------------------------ // Private Methods. // diff -ru htdig-3.1.3/htlib/cgi.cc htdig-3.1.3.patched/htlib/cgi.cc --- htdig-3.1.3/htlib/cgi.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htlib/cgi.cc Sat Nov 20 20:38:12 1999 @@ -157,6 +157,14 @@ //***************************************************************************** +// void cgi::put(char *name, char *value) +// +void cgi::put(char *name, char *value) +{ + pairs->Add(name, new String(value)); +} + +//***************************************************************************** // int cgi::exists(char *name) // int diff -ru htdig-3.1.3/htlib/cgi.h htdig-3.1.3.patched/htlib/cgi.h --- htdig-3.1.3/htlib/cgi.h Thu Apr 22 06:47:58 1999 +++ htdig-3.1.3.patched/htlib/cgi.h Sat Nov 20 20:38:12 1999 @@ -21,6 +21,7 @@ char *operator [] (char *); char *get(char *); + void put(char *name, char *value); int exists(char *); char *path(); diff -ru htdig-3.1.3/htlib/htString.h htdig-3.1.3.patched/htlib/htString.h --- htdig-3.1.3/htlib/htString.h Thu Apr 22 06:47:58 1999 +++ htdig-3.1.3.patched/htlib/htString.h Sat Nov 20 20:38:12 1999 @@ -9,6 +9,7 @@ #include "Object.h" #include +class istream; class ostream; class String : public Object @@ -138,6 +139,7 @@ friend int operator >= (String &a, String &b); friend ostream &operator << (ostream &o, String &s); + friend istream &operator >> (istream &i, String &s); void lowercase(); void uppercase(); diff -ru htdig-3.1.3/htsearch/Display.cc htdig-3.1.3.patched/htsearch/Display.cc --- htdig-3.1.3/htsearch/Display.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htsearch/Display.cc Sat Nov 20 20:38:12 1999 @@ -377,6 +377,8 @@ if (nPages < 1) nPages = 1; // We always have at least one page... + if (nPages > config.Value("maximum_pages", 10)) + nPages = config.Value("maximum_pages"); vars.Add("MATCHES_PER_PAGE", new String(config["matches_per_page"])); vars.Add("MAX_STARS", new String(config["max_stars"])); vars.Add("CONFIG", new String(config["config"])); @@ -498,8 +500,6 @@ char *p; QuotedStringList pnt(config["page_number_text"], " \t\r\n"); QuotedStringList npnt(config["no_page_number_text"], " \t\r\n"); - if (nPages > config.Value("maximum_pages", 10)) - nPages = config.Value("maximum_pages"); for (i = 1; i <= nPages; i++) { if (i == pageNumber) diff -ru htdig-3.1.3/htsearch/htsearch.cc htdig-3.1.3.patched/htsearch/htsearch.cc --- htdig-3.1.3/htsearch/htsearch.cc Sat Nov 20 20:12:55 1999 +++ htdig-3.1.3.patched/htsearch/htsearch.cc Sat Nov 20 20:38:12 1999 @@ -69,10 +69,17 @@ StringList requiredWords; int i; + // + // The total search can NEVER take more than 5 minutes. + // + alarm(5 * 60); + + cgi input; + // // Parse command line arguments // - while ((c = getopt(ac, av, "c:dv")) != -1) + while ((c = getopt(ac, av, "a:c:dv")) != -1) { switch (c) { @@ -80,6 +87,13 @@ configFile = optarg; override_config=1; break; + case 'a': + { + char *name = good_strtok(optarg, '='); + char *value = good_strtok(NULL, 0); + input.put(name, value); + break; + } case 'v': debug++; break; @@ -93,14 +107,8 @@ } // - // The total search can NEVER take more than 5 minutes. - // - alarm(5 * 60); - - // // Parse the CGI parameters. // - cgi input; // // Compile the URL limit pattern.