From greyleaf@yggdrasill.net Sun Mar 5 17:40:03 2000 Date: Sat, 4 Mar 2000 15:35:16 -0700 (MST) From: Jim Cole To: htdig3-dev@htdig.org Subject: [htdig3-dev] Multiple Excerpt Patch Hi - Quite a while back I offered to work on a patch that would support multiple excerpts, which was a feature requested by a couple users. Alas, after much procrastinating, I have a first cut at it ;) For lack of a better idea, I used max_excerpts as the name for the configuration attribute. If this attribute is not set, or set to one, my additions aren't even touched, except for the attribute code itself and a conditional. So in "theory", it *can't* break anthing using a current configuration file, regardless of anything stupid I might have otherwise done. The code could probably be smarter about handling cases where terms occur multiple times within the excerpt_length. But I was concerned about burning too much time hunting for the additional occurances. I think it would also sort of violate the excerpt_length attribute. There is a running copy of the patched code at http://www.tngenweb.org/ss/tngenwebss.html which is configured for a maximum of three excerpts. This is my first attempt to even touch the HtDig code, so be gentle ;) Please let me know if you see any problems or have any suggestions. Jim Cole *** htdig-3.1.5/htsearch/Display.cc.orig Thu Mar 2 18:24:06 2000 --- htdig-3.1.5/htsearch/Display.cc Sat Mar 4 14:58:49 2000 *************** Display::excerpt(DocumentRef *ref, Strin *** 1148,1153 **** --- 1148,1154 ---- } } else + if ( first == 0 || config.Value( "max_excerpts" ) == 1 ) { int headLength = strlen(head); int length = config.Value("excerpt_length", 50); *************** Display::excerpt(DocumentRef *ref, Strin *** 1190,1196 **** --- 1191,1297 ---- *text << config["end_ellipses"]; } } + else + { + *text = buildExcerpts( head, urlanchor, fanchor ); + } + return text; + } + + //***************************************************************************** + // Handle cases where multiple document excerpts are requested. + // + const String + Display::buildExcerpts( char *head, String urlanchor, int fanchor ) + { + if ( !config.Boolean( "add_anchors_to_excerpt" ) ) + { + fanchor = 0; + } + + int headLength = strlen( head ); + int excerptNum = config.Value( "max_excerpts", 1 ); + int excerptLength = config.Value( "excerpt_length", 50 ); + int lastPos = 0; + int curPos = 0; + + String text; + + for ( int i = 0; i < excerptNum; ++i ) + { + int which, termLength; + + int nextPos = allWordsPattern->FindFirstWord( head + lastPos, + which, termLength ); + + if ( nextPos < 0 ) + { + // Ran out of matching terms + break; + } + else + { + // Determine offset from beginning of head + curPos = lastPos + nextPos; + } + + // Slip a break in since there is another excerpt coming + if ( i != 0 ) + { + text << "
\n"; + } + + // Determine where excerpt starts + char *start = &head[curPos] - excerptLength / 2; + + if ( start < head ) + { + start = head; + } + else + { + text << config["start_ellipses"]; + + while ( *start && HtIsStrictWordChar( *start ) ) + { + start++; + } + } + + // Determine where excerpt ends + char *end = start + excerptLength; + + if ( end > head + headLength ) + { + end = head + headLength; + + text << hilight( start, urlanchor, fanchor ); + } + else + { + while ( *end && HtIsStrictWordChar( *end ) ) + { + end++; + } + + // Save end char so that it can be restored + char endChar = *end; + + *end = '\0'; + + text << hilight(start, urlanchor, fanchor); + text << config["end_ellipses"]; + + *end = endChar; + } + + // No more words left to examine in head + if ( (lastPos = curPos + termLength) > headLength ) + break; + } + + return text; } //***************************************************************************** *** htdig-3.1.5/htsearch/Display.h.orig Thu Mar 2 18:24:13 2000 --- htdig-3.1.5/htsearch/Display.h Sat Mar 4 10:00:39 2000 *************** protected: *** 159,164 **** --- 159,165 ---- void expandVariables(char *); void outputVariable(char *); String *excerpt(DocumentRef *ref, String urlanchor, int fanchor, int &first); + const String buildExcerpts( char *head, String urlanchor, int fanchor ); char *hilight(char *str, String urlanchor, int fanchor); void setupTemplates(); void setupImages(); *** htdig-3.1.5/htcommon/defaults.cc.orig Sat Mar 4 10:10:22 2000 --- htdig-3.1.5/htcommon/defaults.cc Sat Mar 4 10:10:14 2000 *************** ConfigDefaults defaults[] = *** 87,92 **** --- 87,93 ---- {"max_description_length", "60"}, {"max_descriptions", "5"}, {"max_doc_size", "100000"}, + {"max_excerpts", "1" }, {"max_head_length", "512"}, {"max_hop_count", "999999"}, {"max_keywords", "-1"},