From grdetil@scrc.umanitoba.ca Tue May 2 14:19:29 2000 Date: Tue, 2 May 2000 15:33:49 -0500 (CDT) From: Gilles Detillieux To: htdig@htdig.org Subject: Re: [htdig] patch for Accents fuzzy algorithm for 3.2.0b2 This is an adaptation for 3.2.0b2 of Robert Marchand's latest fix to his accents fuzzy match algorithm. You should be able to apply this patch in the main source directory of the htdig-3.2.0b2 source tree with "patch -p1 < this_file". Robert's fix changed the algorithm to avoid putting the key as a word in the database, resulting in even more database space savings than his earlier writeDB() method (now obsolete). A new getWords() method adds the key to the list of words, so that htsearch will always search for the unaccented word, even if entered with accents. *** htdig-3.2.0b2/htfuzzy/Accents.h.orig Tue Apr 11 17:53:20 2000 --- htdig-3.2.0b2/htfuzzy/Accents.h Tue May 2 12:46:42 2000 *************** public: *** 28,38 **** Accents(const HtConfiguration& config_arg); virtual ~Accents(); - virtual int writeDB(); - virtual void generateKey(char *word, String &key); virtual void addWord(char *word); private: }; --- 28,38 ---- Accents(const HtConfiguration& config_arg); virtual ~Accents(); virtual void generateKey(char *word, String &key); virtual void addWord(char *word); + + virtual void getWords(char *word, List &words); private: }; *** htdig-3.2.0b2/htfuzzy/Accents.cc.orig Tue Apr 11 17:53:20 2000 --- htdig-3.2.0b2/htfuzzy/Accents.cc Tue May 2 12:49:52 2000 *************** Accents::~Accents() *** 85,140 **** } //***************************************************************************** - // int Accents::writeDB() - // - int - Accents::writeDB() - { - String var = name; - var << "_db"; - String filename = config[var]; - - index = Database::getDatabaseInstance(DB_HASH); - if (index->OpenReadWrite(filename, 0664) == NOTOK) - return NOTOK; - - String *s; - char *fuzzyKey; - - int count = 0; - - dict->Start_Get(); - while ((fuzzyKey = dict->Get_Next())) - { - s = (String *) dict->Find(fuzzyKey); - - // Only add if meaningfull list - if (mystrcasecmp(fuzzyKey, s->get()) != 0) { - - index->Put(fuzzyKey, *s); - - if (debug > 1) - { - cout << "htfuzzy: '" << fuzzyKey << "' ==> '" << s->get() << "'\n" - ; - } - count++; - if ((count % 100) == 0 && debug == 1) - { - cout << "htfuzzy: keys: " << count << '\n'; - cout.flush(); - } - } - } - if (debug == 1) - { - cout << "htfuzzy:Total keys: " << count << "\n"; - } - return OK; - } - - - //***************************************************************************** // void Accents::generateKey(char *word, String &key) // void --- 85,90 ---- *************** Accents::addWord(char *word) *** 170,175 **** --- 120,129 ---- String key; generateKey(word, key); + // Do not add fuzzy key as a word, will be added at search time. + if (mystrcasecmp(word, key.get()) == 0) + return; + String *s = (String *) dict->Find(key); if (s) { *************** Accents::addWord(char *word) *** 182,184 **** --- 136,157 ---- } } + + //***************************************************************************** + // void Accents::getWords(char *word, List &words) + // + void + Accents::getWords(char *word, List &words) + { + + if (!word || !*word) + return; + + Fuzzy::getWords(word, words); + + // fuzzy key itself is always searched. + String fuzzyKey; + generateKey(word, fuzzyKey); + if (mystrcasecmp(fuzzyKey.get(), word) != 0) + words.Add(new String(fuzzyKey)); + } -- Gilles R. Detillieux E-mail: Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil Dept. Physiology, U. of Manitoba Phone: (204)789-3766 Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930 ------------------------------------ To unsubscribe from the htdig mailing list, send a message to htdig-unsubscribe@htdig.org You will receive a message to confirm this.