#! /usr/local/bin/perl
#
#  The next line needs to be customized for each site!!
#   $gopher_home - the root directory for your gn server.  If this
#   is not set - the file sizes will not be correct.
#
$gopher_home="/online/gopher/fedix.data" ;
#
$fedix = 1 ;
#
$baseid = $$ ;
#
@wk_day_name[0] = "Sun" ;
@wk_day_name[1] = "Mon" ;
@wk_day_name[2] = "Tue" ;
@wk_day_name[3] = "Wed" ;
@wk_day_name[4] = "Thu" ;
@wk_day_name[5] = "Fri" ;
@wk_day_name[6] = "Sat" ;
#
#  build the output data filenames, then tell the user what they are
#
print "The base ID for output files is = $baseid\n" ;
$statfile = $baseid.".stats";
$hostfile = $baseid.".hosts" ;
$textfile = $baseid.".texts" ;
$wordfile = $baseid.".words" ;
$menufile = $baseid.".menus" ;
$errrfile = $baseid.".error" ;
open(GNBASE,"> gnbase.pid") ;
print GNBASE $baseid ;
close(GNBASE) ;

print "Summary file = $statfile\n" ;
print "Word file    = $wordfile\n" ;
print "Menu file    = $menufile\n" ;
print "Error file   = $errrfile\n" ;
print "Text file    = $textfile\n" ;
print "Hosts file   = $hostfile\n" ;
#

# now lets go do the STDIN data - 
#
while (<>) {
    chop ;
    $input_line = $_ ;
#
#  count each transaction
#
    $transactions++ ;
#
#  clean up old line and split the incoming line into it's peices
#
    undef(@line) ;
    (@line) = split(/[()]/,$input_line) ;
    $no_items = $#line ;
#
# the host
#
    ($host,$when) = split(':',$line[0],2) ;
    $file = $line[1] ;
#
# build the date information
#
    ($dum,$day,$month,$day_no,$hour) = split(/\W*\s+\W*/,$when) ;
#
    if(length($month)>1) {
    if(!defined($first_day)) {
        $first_day= $month." ".$day_no ;
    }
    
    
    $last_day = $month." ".$day_no ;
    }

#   build the histogram info
#
    @count_by_hour[$hour]++ ;
    @count_by_day_no[$day_no]++ ;
    if($day eq "Sun" )  {@count_by_weekday[0]++;}   
    if($day eq "Mon" )  {@count_by_weekday[1]++;}   
    if($day eq "Tue" )  {@count_by_weekday[2]++;}   
    if($day eq "Wed" )  {@count_by_weekday[3]++;}   
    if($day eq "Thu" )  {@count_by_weekday[4]++;}   
    if($day eq "Fri" )  {@count_by_weekday[5]++;}   
    if($day eq "Sat" )  {@count_by_weekday[6]++;}   
#
#
# count each remot host
#
    $host =~ tr/A-Z/a-z/ ;
    $site_count{$host}++ ;
    (@op) = split(':',$line[2]) ;
    $no_ops = $#op ;
#
#  count caches - by root and others
#
#    print "Host = $host\n" ;
#    print "file = $file\n" ;
#    print "oprn = $op[1]\n" ;
#    print "when = $when\n\n" ;

    $op[1] =~ tr/A-Z/a-z/ ;
    if ( $op[1] eq " sent cache" )  {
          $len = length($file) ;
          if($len > 4) {
              $other_cache{$file}++ ;
              $other_menu++ ;
          } else {
              $root_menu++ ;
          }
	}


#
# count "text files downloaded"
#
    if( $op[1] eq " sent text" ) {
          $text_file++ ;
          $text_sent{$file}++ ;
	}
# 
# count "ranges searched  - and downloaded
#
    if( $op[1] eq " sent range") {
         $range_file{$file}++ ;
    }
    if( ($op[1] eq " sent range search") ||
        ($op[1] eq " sent wais search")  ||
        ($op[1] eq " sent grep search")   ){
          $search_file{$file}++ ;
          (@word) = split(' ',$op[2]);
          for($i=1; $i <= $#word; $i++ ) {
             $search_word{$word[$i]}++ ;
          }
    }



#  
# now get the missing stuff
#
#  first the "sorry access denied
#
    if( ($op[1] =~ /cgi programs / ) ||
        ($op[1] =~ /can't exec cg/ )        ||
        ($op[1] =~ /can't open ca/ ) ||
        ($op[1] =~ /sorry, access/   ) ||
        ($op[1] =~ /search term r/   ) ||
        ($op[1] =~ /can't open ca/   ) ||
        ($op[1] =~ /corrupt cache/  ) ||
        ($op[1] =~ /unknown selec/  ) ||
        ($op[1] =~ /unknown serve/  ) ||
        ($op[1] =~ /can't open po/  ) ||
        ($op[1] =~ /option -s una/  ) ||
        ($op[1] =~ /unknown optio/  ) ||
        ($op[1] =~ /can't open lo/  ) ||
        ($op[1] =~ /selector synt/  ) ||
        ($op[1] =~ /syntax error /  ) ||
        ($op[1] =~ /syntax error /  ) ||
        ($op[1] =~ /syntax error /  ) ||
        ($op[1] =~ /malformed ran/  ) ||
        ($op[1] =~ /search string/  ) ||
        ($op[1] =~ /unknown selec/  ) ||
        ($op[1] =~ /can't exec/     ) ||
        ($op[1] =~ /search term r/  ) ||
        ($op[1] =~ /can't open fi/  ) ||
        ($op[1] =~ /waisgn is not/  ) ||
        ($op[1] =~ /couldn't exec/  ) ||
        ($op[1] =~ /can't decompr/  ) ||
        ($op[1] =~ /corrupted cac/  ) ||
        ($op[1] =~ /could not tur/  ) ||
        ($op[1] =~ /could not set/  ) ||
        ($op[1] =~ /accept failed/  ) ||
        ($op[1] =~ /unable to for/  ) ||
        ($op[1] =~ /http error/     ) ||
        ($op[1] =~ /can't chdir f/  ) ||
        ($op[1] =~ /selector only/  ) ||
        ($op[1] =~ /no match for /  ) ||
        ($op[1] =~ /directory not/  ) ) {
        
         $no_access{$op[1]}++ ;
         $access_denied++ ;
      }


  }
##### end of main collection loop
##### begin the task of generating the needed outputs
#
#  if we have no month name in the input stream
#
if(length($month) == 0 ) { 
    $month = "no data," ;
}
#
#  count sites
#
foreach $site (sort keys(%site_count)) {
    $site_no++ ;
}
#
#  begin dumping data *****************

if ( $fedix == 1 ) {
#  append to the daily gopher log
#
# make Cap(month)
#
$first_letter = substr($month,0,1) ;
$rest_month  = substr($month,1) ;
$first_letter =~ tr/a-z/A-Z/ ;

$month_to_print = $first_letter.$rest_month ; 

#
open(STATS, ">> gopher.day"   ) || die "can't open gopher.day\n";
printf(STATS "%s %2d,%d,%d,%d,%d,%d,%d,%d,%d,%d,\n",
           $month_to_print, $day_no,
           $root_menu,
           $site_no,
           $search,
           ' ',
           $other_menu,
           $text_file,   
           ' ', 
           $transactions,
           $access_denied) ;

close(STATS) ;
}  # fedix = 1
#
#  print the summary file
#
open(STATS,"> ".$statfile) ;
     print STATS "Date               = $day $month $day_no\n" ;
     print STATS "From  $first_day   to  $last_day\n" ;
     print STATS "Total Transactions = $transactions\n" ;
     print STATS "Root menu accesses = $root_menu\n" ;
     print STATS "Sub-menu accesses  = $other_menu\n" ;
     print STATS "Text files fetched = $text_file\n" ;
     print STATS "Searches requested = $search\n" ;
     print STATS "Search ranges sent = $range\n" ;
     print STATS "Access Denied      = $access_denied\n" ;

#
#  the hourly histogram goes into the summary file
#
print STATS "\nHistogram of activity by hour\n";
#
#  find the peak - for scaleing
#
for($period = 0 ; $period < 24 ; $period++) {
    if($count_by_hour[$period] > $top) {
         $top = $count_by_hour[$period] ;
       }
  }
for($period = 0 ; $period < 32 ; $period++) {
    if($count_by_day_no[$period] > $top_day) {
         $top_day = $count_by_day_no[$period] ;
       }
  }

for($period = 0 ; $period < 7 ; $period++) {
    if($count_by_weekday[$period] > $top_wd) {
         $top_wd = $count_by_weekday[$period] ;
       }
   }




#
#  print the peak and setp values
# 
$increment = int( $top /68 ) +1 ;
print STATS "Peak     = $top\n" ;
print STATS "Each '*' = $increment gopher transactions.\n" ;
for ($period=0 ; $period < 24; $period++ ){
    printf(STATS "%4d - ",$period) ;
    for ( $i = 1; $i <= $count_by_hour[$period]; $i+=$increment ) {
        print STATS "*" ;
      }
  print STATS "\n" ;
  }
print STATS "\n" ;

# 
$increment = int( $top_day /68 ) +1 ;
print STATS "Peak     = $top_day\n" ;
print STATS "Each '*' = $increment gopher transactions.\n" ;
for ($period=0 ; $period < 32; $period++ ){
    printf(STATS "%4d - ",$period) ;
    for ( $i = 1; $i <= $count_by_day_no[$period]; $i+=$increment ) {
    print STATS "*" ;
  }
  print STATS "\n";
}
# 
$increment = int( $top_wd /68 ) +1 ;
print STATS "Peak     = $top_wd\n" ;
print STATS "Each '*' = $increment gopher transactions.\n" ;

for($period = 0 ; $period < 7 ; $period++) {
    printf(STATS "%4s - ",$wk_day_name[$period]) ;
    for ( $i = 1; $i <= $count_by_weekday[$period]; $i+=$increment ) {
        print STATS "*" ;
    }
    print STATS "\n" ;
}
close (STATS) ;
#


#  Search words are tallied next
#
open(WORDS,"> ".$wordfile) ;
print WORDS "\nSearch words\n" ;
if (defined(%search_word) ) {
  foreach $word ( sort keys(%search_word))
    {
      $target = $word ;
      printf(WORDS "%4d  %s\n",$search_word{$target}, $word) ;
    }
}  else {
    print WORDS "    <<none>>\n" ;
}
close( WORDS);
#
#  now comes the cache - menus listing
#
open(OTHER,"> ",$menufile);
print OTHER "\nOther than root menus\n" ;
if (defined(%other_cache) ) {
foreach $cache (sort keys(%other_cache) )
{
    $target = $cache ;
    printf(OTHER "%4d  %s\n", $other_cache{$target}, $cache) ;
  }
}
close(OTHER) ;
#
#  Then the text files retrieved
#
open(TEXT,"> ".$textfile) ;
print TEXT "\nText files sent\n" ;
if (defined(%text_sent) ) {
foreach $files (sort keys(%text_sent) )
{
    $target = $files ;
#
##
# take the leading protocol info off the name
#
    $pos = index($files,"/");
    $file_name=substr($files,$pos) ;
#
# build the fully qualified file name
#  then get the size of the file - we want to see the number of bytes
#  flowing thru our system
#
    $fqfn=$gopher_home.$file_name ;
    ($dev, $ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,
      $blksize,$blocks) = stat($fqfn) ;
#
#  and put it to the file
#
    printf(TEXT "%4d  %12d %s\n", $text_sent{$target}, 
                             $text_sent{$target} * $size,
                             $file_name) ;
  }
}
close(TEXT) ;
#
#  Next comes the host information
#
open(HOSTS,"> ".$hostfile) ;
print HOSTS "\nHosts accessing the system\n" ;
if (defined(%site_count) ) {
close(HOSTS) ;
open(HOSTS,"|sort -r | head -20 >> $hostfile") ;
foreach $site (sort keys (%site_count) ) 
{
    $target = $site ;
    printf(HOSTS "%4d  %s\n", $site_count{$target}, $site) ;
  }
}
close(HOSTS) ;
#
# finally the errors - missing caches, access denied and bad selector
#  attempts
#
open(ERRORS,"> ".$errrfile) ;
print ERRORS "\nAll error conditions\n" ;
if (defined(%no_access) ) {
    foreach $err (sort keys (%no_access))    {
       $target = $err ;
       printf(ERRORS "%4d  %s\n", $no_access{$target}, $err) ;
     }
   }
close(ERRORS) ;



