#!/usr/bin/perl # # HT://DIGALIZER # # Author: # Nathan Hand # # History: # 2001 June 4th - First Release, No Version Number # # Usage: # cat /var/log/htsearch.log | htdigalizer | webalizer - # # Description: # Converts htsearch logs (enabled in htdig.conf with logging:yes) into # Extended Common Log Format. Loses lots of information in the process # and CLF isn't an appropriate log format. Useful because there are no # htsearch specific graphing utilities, so with this script you'll get # something up quickly using webalizer. # # Bugs: # The labels generated by webalizer will be misleading. # The graphs and tables are mostly inappropriate. # # Future: # This program has no future! It's a stopgap until someone (preferably # not me) writes a proper htsearch specific graphing utility. # use Time::Local; use POSIX qw(strftime); sub parse_syslog { my($logline) = @_; %months = ("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"); $year = (localtime(time))[5]; if ($logline =~ /(\S+)\s+(\d+)\s+(\d\d):(\d\d):(\d\d)\s+(\S+)\s+([^:]+):\s(.*)/) { $month = $months{$1}; $day = $2; $hour = $3; $min = $4; $sec = $5; $host = $6; $process = $7; $htlog = $8; $time = timelocal($sec, $min, $hour, $day, $month, $year); return ($time, $host, $process, $htlog); } } sub parse_htlog { my($logline) = @_; if ($logline =~ /(\S+) \[([^\]]*)\] \(([^\)]*)\) \[([^\]]*)\] \[([^\]]*)\] \((\d+)\/(\d+)\) - (\d+) -- (.*)/) { $remote = $1; $config = $2; $method = $3; $words = $4; $logic = $5; $matches = $6; $perpage = $7; $curpage = $8; $referrer = $9; return ($remote, $config, $method, $words, $logic, $matches, $perpage, $curpage, $referrer); } } sub buildclf { my($time, $host, $remote, $words, $matches, $referrer) = @_; $words =~ s/([\"\%\\])/sprintf "%%%x", ord($1)/eg; $referrer =~ s/([\"\\])/sprintf "%%%x", ord($1)/eg; $datetime = strftime "[%d/%b/%Y:%H:%M:%S %z]", gmtime($time); $status = ($matches == 0) ? "404" : "200"; $rfc931 = "-"; $authuser = "-"; $request = "\"" . "GET $words" . "\""; $referrer = "\"" . $referrer . "\""; $useragent = "\"" . "ht://dig from $host" . "\""; return "$remote $rfc931 $authuser $datetime $request $status $matches $referrer $useragent"; } while () { ($time, $host, $process, $htlog) = &parse_syslog($_); ($remote, $config, $method, $words, $logic, $matches, $perpage, $curpage, $referrer) = &parse_htlog($htlog); print &buildclf($time, $host, $remote, $words, $matches, $referrer) . "\n"; }