diff -cprN ../htdig-3.1.4-with-url_seed_score/htcommon/defaults.cc ./htcommon/defaults.cc
*** ../htdig-3.1.4-with-url_seed_score/htcommon/defaults.cc Sun Jan 30 13:44:57 2000
--- ./htcommon/defaults.cc Sun Jan 30 13:51:35 2000
*************** ConfigDefaults defaults[] =
*** 122,127 ****
--- 122,128 ----
{"search_algorithm", "exact:1"},
{"search_results_footer", "${common_dir}/footer.html"},
{"search_results_header", "${common_dir}/header.html"},
+ {"search_results_order", ""},
{"search_results_wrapper", ""},
{"server_aliases", ""},
{"server_wait_time", "0"},
diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/attrs.html ./htdoc/attrs.html
*** ../htdig-3.1.4-with-url_seed_score/htdoc/attrs.html Sun Jan 30 13:44:57 2000
--- ./htdoc/attrs.html Sun Jan 30 12:43:13 2000
***************
*** 5256,5261 ****
--- 5256,5317 ----
-
+
+ search_results_order
+
+ -
+
+ -
+ type:
+
+ -
+ string list
+
+ -
+ used by:
+
+ -
+ htsearch
+
+ -
+ default:
+
+ -
+ <empty>
+
+ -
+ description:
+
+ -
+ This specifies a list of patterns for URLs in
+ search results. Results will be displayed in the
+ specified order, with the search algorithm result
+ as the second order. Remaining areas, that do not
+ match any of the specified patterns, can be placed
+ by using * as the pattern. If no * is specified,
+ one will be implicitly placed at the end of the
+ list.
+ See also url_seed_score.
+
+ -
+ example:
+
+ -
+
+
+ |
+ search_results_order: /docs/|faq.html *
+ /maillist/ /testresults/
+ |
+
+
+
+
+
+
+
+
+ -
search_results_wrapper
***************
*** 6864,6870 ****
point constants.
More straightforward is to think of the format as
"newscore = oldscore*N+M",
! but with the "newscore = oldscore" part left out.
-
example:
--- 6920,6928 ----
point constants.
More straightforward is to think of the format as
"newscore = oldscore*N+M",
! but with the "newscore = oldscore" part left out.
! See also
! search_results_order.
-
example:
diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byname.html ./htdoc/cf_byname.html
*** ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byname.html Sun Jan 30 13:44:57 2000
--- ./htdoc/cf_byname.html Sun Jan 30 12:43:13 2000
***************
*** 142,147 ****
--- 142,148 ----
search_algorithm
search_results_footer
search_results_header
+
search_results_order
search_results_wrapper
server_aliases
server_max_docs
diff -cprN ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byprog.html ./htdoc/cf_byprog.html
*** ../htdig-3.1.4-with-url_seed_score/htdoc/cf_byprog.html Sun Jan 30 13:44:57 2000
--- ./htdoc/cf_byprog.html Sun Jan 30 12:43:13 2000
***************
*** 159,164 ****
--- 159,165 ----
search_algorithm
search_results_footer
search_results_header
+
search_results_order
search_results_wrapper
sort
sort_names
diff -cprN ../htdig-3.1.4-with-url_seed_score/htlib/List.cc ./htlib/List.cc
*** ../htdig-3.1.4-with-url_seed_score/htlib/List.cc Fri Apr 16 20:47:40 1999
--- ./htlib/List.cc Sun Jan 30 12:43:13 2000
*************** List &List::operator=(List &list)
*** 425,427 ****
--- 425,461 ----
}
+ //*********************************************************************
+ // void AppendList(List &list)
+ // Move contents of other list to the end of this list, and empty the
+ // other list.
+ //
+ void List::AppendList(List &list)
+ {
+ // Never mind an empty list or ourselves.
+ if (list.number == 0 || &list == this)
+ return;
+
+ // Correct our pointers in head and tail.
+ if (tail)
+ {
+ // Link in other list.
+ tail->next = list.head;
+ list.head->prev = tail;
+
+ // Update members for added contents.
+ number += list.number;
+ tail = list.tail;
+ }
+ else
+ {
+ head = list.head;
+ tail = list.tail;
+ number = list.number;
+ }
+
+ // Clear others members to be an empty list.
+ list.head = list.tail = list.current = 0;
+ list.current_index = -1;
+ list.number = 0;
+ }
diff -cprN ../htdig-3.1.4-with-url_seed_score/htlib/List.h ./htlib/List.h
*** ../htdig-3.1.4-with-url_seed_score/htlib/List.h Mon Feb 3 18:11:04 1997
--- ./htlib/List.h Sun Jan 30 12:43:13 2000
*************** public:
*** 112,117 ****
--- 112,120 ----
List &operator= (List *list) {return *this = *list;}
List &operator= (List &list);
+ // Move one list to the end of another, emptying the other list.
+ void AppendList (List &list);
+
protected:
//
// Pointers into the list
diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/Display.cc ./htsearch/Display.cc
*** ../htdig-3.1.4-with-url_seed_score/htsearch/Display.cc Sun Jan 30 13:44:57 2000
--- ./htsearch/Display.cc Sun Jan 30 13:10:27 2000
*************** static char RCSid[] = "$Id: Display.cc,v
*** 25,30 ****
--- 25,31 ----
#include "HtURLCodec.h"
#include "HtWordType.h"
#include "HtURLSeedScore.h"
+ #include "SplitMatches.h"
//*****************************************************************************
//
*************** Display::buildMatchList()
*** 938,944 ****
char *id;
String coded_url, url;
ResultMatch *thisMatch;
! List *matches = new List();
double backlink_factor = config.Double("backlink_factor");
double date_factor = config.Double("date_factor");
SortType typ = sortType();
--- 939,945 ----
char *id;
String coded_url, url;
ResultMatch *thisMatch;
! SplitMatches matches(config);
double backlink_factor = config.Double("backlink_factor");
double date_factor = config.Double("date_factor");
SortType typ = sortType();
*************** Display::buildMatchList()
*** 1029,1048 ****
//
// Append this match to our list of matches.
//
! matches->Add(thisMatch);
! if (matches->Count() == 1 || maxScore < score)
maxScore = score;
! if (matches->Count() == 1 || minScore > score)
minScore = score;
}
//
! // The matches need to be ordered by relevance level.
! // Sort it.
//
! sort(matches);
! return matches;
}
//*****************************************************************************
--- 1030,1054 ----
//
// Append this match to our list of matches.
//
! matches.Add(thisMatch, url.get());
!
! if (maxScore < score)
maxScore = score;
! if (minScore > score)
minScore = score;
}
//
! // Each sub-area is then sorted by relevance level.
//
! List *matches_part; // Outside of loop to keep for-scope warnings away.
! for (matches_part = matches.Get_First();
! matches_part != 0;
! matches_part = matches.Get_Next())
! sort(matches_part);
! // Then all sub-lists are concatenated and put in a new list.
! return matches.JoinedLists();
}
//*****************************************************************************
diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/Makefile.in ./htsearch/Makefile.in
*** ../htdig-3.1.4-with-url_seed_score/htsearch/Makefile.in Fri Apr 16 20:47:50 1999
--- ./htsearch/Makefile.in Sun Jan 30 12:43:13 2000
*************** include $(top_builddir)/Makefile.config
*** 9,15 ****
OBJS= Display.o DocMatch.o ResultList.o ResultMatch.o \
Template.o TemplateList.o WeightWord.o htsearch.o \
! parser.o
FOBJS= $(top_builddir)/htfuzzy/libfuzzy.a
TARGET= htsearch
--- 9,15 ----
OBJS= Display.o DocMatch.o ResultList.o ResultMatch.o \
Template.o TemplateList.o WeightWord.o htsearch.o \
! parser.o SplitMatches.o
FOBJS= $(top_builddir)/htfuzzy/libfuzzy.a
TARGET= htsearch
diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.cc ./htsearch/SplitMatches.cc
*** ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.cc Thu Jan 1 01:00:00 1970
--- ./htsearch/SplitMatches.cc Sun Jan 30 12:43:13 2000
***************
*** 0 ****
--- 1,175 ----
+ //
+ // SplitMatches.cc
+ //
+ // SplitMatches:
+ // Holds a list of lists with the matches, as specified in
+ // search_results_order.
+ //
+ // Part of the ht://Dig package
+ // Copyright (c) 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ //
+ //
+ // $Id$
+
+ #include "StringList.h"
+ #include "StringMatch.h"
+ #include "SplitMatches.h"
+ #include
+ #include
+
+ // This class is only used in private members of SplitMatches.
+ // The OO-right thing would be to nest this inside the private
+ // declaration of SplitMatches, but that would cause portability
+ // problems according to
+ // .
+ //
+ // It is used as a container for a key (String) and a list.
+ //
+ class MatchArea : public Object
+ {
+ public:
+ // Construct from a string applicable to StringMatch.
+ MatchArea(const String &);
+
+ ~MatchArea();
+
+ // Does this item match?
+ inline bool Match(char *s)
+ { return match.hasPattern() && match.FindFirst(s) != -1; }
+
+ // Return the contained list.
+ List *MatchList() { return &myList; }
+
+ private:
+ StringMatch match;
+ List myList;
+
+ // These member functions are not supposed to be implemented, but
+ // mentioned here as private so the compiler will not generate them if
+ // someone puts in buggy code that would use them.
+ MatchArea();
+ MatchArea(const MatchArea &);
+ void operator= (const MatchArea &);
+ };
+
+ MatchArea::MatchArea(const String &url_regex)
+ {
+ // We do not want to "install" the catch-the-rest pattern as a real
+ // pattern; it must always return false for the "Match" operator.
+ if (strcmp("*", url_regex.get()) != 0)
+ match.Pattern(url_regex.get());
+ }
+
+ MatchArea::~MatchArea()
+ {
+ }
+
+ SplitMatches::SplitMatches(Configuration &config)
+ {
+ char *config_item = "search_results_order";
+
+ StringList sl(config[config_item], "\t \r\n");
+
+ mySubAreas = new List();
+ myDefaultList = 0;
+
+ // Parse each as in TemplateList::createFromString.
+ for (int i = 0; i < sl.Count(); i++)
+ {
+ String sub_area_pattern = sl[i];
+ MatchArea *match_item = new MatchArea(sub_area_pattern);
+ mySubAreas->Add(match_item);
+
+ // If this is the magic catch-rest sub-area-pattern, we want to
+ // use its list-pointer to store all URLs that do not match
+ // anything else.
+ // We will iterate over a list where one of the patterns is
+ // known to not match, but that's a small penalty for keeping
+ // the code simple.
+ if (strcmp("*", sub_area_pattern.get()) == 0)
+ myDefaultList = match_item->MatchList();
+ }
+
+ // If we did not have a catch-the-rest pattern, install one at the
+ // end of the list.
+ if (myDefaultList == 0)
+ {
+ MatchArea *match_item = new MatchArea(String("*"));
+ mySubAreas->Add(match_item);
+
+ myDefaultList = match_item->MatchList();
+ }
+ }
+
+ SplitMatches::~SplitMatches()
+ {
+ // myDefaultList is a pointer to one of the items in mySubAreas and
+ // must not be explicitly deleted here.
+
+ delete mySubAreas;
+ }
+
+ void
+ SplitMatches::Add(ResultMatch *match, char *url)
+ {
+ List *area_list = mySubAreas;
+ MatchArea *area_item;
+
+ area_list->Start_Get();
+
+ // This is a linear search. If there's a problem with that, we
+ // can improve it. For now, a list with tens of areas seems lots,
+ // and break-even with a more clever search-scheme is probably in
+ // the hundreds.
+ while ((area_item = (MatchArea *) area_list->Get_Next()))
+ {
+ // Use the first match only.
+ if (area_item->Match(url))
+ {
+ area_item->MatchList()->Add(match);
+ return;
+ }
+ }
+
+ // We'll get here if no match was found, so we add to the
+ // catch-the-rest list.
+ myDefaultList->Add(match);
+ }
+
+ // Just a simple iterator function.
+ List *
+ SplitMatches::Get_Next()
+ {
+ MatchArea *next_area = (MatchArea *) mySubAreas->Get_Next();
+ List *next_area_list = 0;
+
+ if (next_area != 0)
+ next_area_list = next_area->MatchList();
+
+ return next_area_list;
+ }
+
+ // Rip out the sub-areas lists and concatenate them into one list.
+ List *
+ SplitMatches::JoinedLists()
+ {
+
+ // We make a new list here, so we don't have to worry about
+ // mySubAreas being dangling or null.
+ List *all_areas = new List();
+ List *sub_areas = mySubAreas;
+ MatchArea *area;
+
+ sub_areas->Start_Get();
+
+ while (area = (MatchArea *) sub_areas->Get_Next())
+ {
+ // "Destructively" move the contents of the list,
+ // leaving the original list empty.
+ all_areas->AppendList(*(area->MatchList()));
+ }
+
+ return all_areas;
+ }
diff -cprN ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.h ./htsearch/SplitMatches.h
*** ../htdig-3.1.4-with-url_seed_score/htsearch/SplitMatches.h Thu Jan 1 01:00:00 1970
--- ./htsearch/SplitMatches.h Sun Jan 30 12:43:13 2000
***************
*** 0 ****
--- 1,53 ----
+ //
+ // SplitMatches.h
+ //
+ // SplitMatches: Constructed from a Configuration, see doc
+ // for format of config item "search_results_order".
+ // Used to contain a number of ResultMatches, putting them in separate
+ // lists depending on the URL with method Add.
+ // Iterator methods Get_First and Get_Next returns the sub-lists.
+ // Method Joined returns a new list with all the sub-lists
+ // concatenated.
+ //
+ // $Id$
+ //
+ // Part of the ht://Dig package
+ // Copyright (c) 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ //
+ //
+ #ifndef _splitmatches_h
+ #define _splitmatches_h
+
+ #include "Configuration.h"
+ #include "ResultMatch.h"
+ #include "List.h"
+
+ class SplitMatches
+ {
+ public:
+ SplitMatches(Configuration &);
+ ~SplitMatches();
+
+ void Add(ResultMatch *, char *);
+ List *JoinedLists();
+ List *Get_First()
+ { mySubAreas->Start_Get(); return Get_Next(); }
+
+ List *Get_Next();
+
+ private:
+ // These member functions are not supposed to be implemented.
+ SplitMatches();
+ SplitMatches(const SplitMatches &);
+ void operator= (const SplitMatches &);
+
+ // (Lists of) Matches for each sub-area regex.
+ List *mySubAreas;
+
+ // Matches for everything else.
+ List *myDefaultList;
+ };
+
+ #endif /* _splitmatches_h */