#!/usr/local/bin/perl

require 'timelocal.pl';
require 'ctime.pl';

$start = 0;
$stop  = 999999;

@x = split (/\//, $0); $me = pop @x;
$USAGE="$me -start day -stop day files ...\n";


@files = ();
while ($arg = shift) {
	unless ($arg =~ /^-/) {
		push (@files, $arg);
		next;
	}
	if ($arg eq '-start') {
		$start = shift;
	}
	elsif ($arg eq '-stop') {
		$stop = shift;
	}
	elsif ($arg eq '-ndays') {
		($n = shift)--;
		@t = localtime(time);
		$stop = sprintf ("%02d%02d%02d", $t[5],$t[4]+1,$t[3]);
		@t = localtime(time-86400*$n);
		$start = sprintf ("%02d%02d%02d", $t[5],$t[4]+1,$t[3]);
	}
	else {
		die $USAGE;
	}
}

@ARGV = @files;

while (<>) {
	next unless (/^\s*([^\s]+)\s+(\d+)\s+([\d:]+).*$/);
	chop;
	$who = $1;
	$day = $2;
	$hms = $3;
	chop $hms;
	next if ($day < $start);
	last if ($day > $stop);

	if ($day =~ /(\d\d)(\d\d)(\d\d)/) {
		$yr = $1;
		$mo = $2;
		$da = $3;
	}

	if ($hms =~ /(\d\d):(\d\d):(\d\d)/) {
		$hr = $1;
		$mi = $2;
		$se = $3;
	}

	$time = &timelocal ($se,$mi,$hr,$da,$mo-1,$yr);

	$start_time = $time unless defined $start_time;
	$stop_time  = $time;

	if (/Processing Query/i) {
		$query_start = $time;
		$query_request_count++;

		$hash_desc++ if (/#desc/);
		$hash_opaq++ if (/#opaque/);
		$hash_index_error{$1}++		if (/#index error (\d+)/);
		$hash_index_maxresult{$1}++	if (/#index maxresult (\d+)/);
		$hash_index_case{$1}++		if (/#index case (\w+)/);
		$hash_index_matchword++		if (/#index matchword/);

		if (/#END (.*)$/) {
			$query =~ s/\r//;
			$query = $1;
			if ($query =~ /^".*"$/) {
				$quoted_query++;
			} else {
				$unquoted_query++;
				@terms = split (/\s+/, $query);
				$n = $#terms+1;
				$n_query_terms{$n}++;
	#print "$query\n" if ($n % 2 == 0);
	#print "$query\n" if ($n > 6);
			}
			$query_op{'AND'}++	if ($query =~ /\s+and\s+/i);
			$query_op{'OR'}++	if ($query =~ /\s+or\s+/i);
		}
	}
	elsif (/Query returned (\d+)/i) {
		$num_results = $1;
		$query_stop = $time;
		$query_result_count++;

		$dt = $query_stop - $query_start;
		$bin = int ($dt / 10);
		$bin = 14 if ($bin > 14);
		$QueryTime{$bin}++;

		if ($num_results > 0) {
			$bin = int (5 * log ($num_results) / log(10));
			$bin = 13 if ($bin > 13);
		} else {
			$bin = 'zero';
		}
		$NumResults{$bin}++;
			
	}
	elsif (/query is invalid/i) {
		$query_stop = $time;
		$query_invalid_count++;
	}
	elsif (/Client is gone/i) {
		$query_stop = $time;
		$query_aborted_count++;
	}
	elsif (/exceeded lifetime/i) {
		$query_lifetime_count++;
	}
		
}

chop ($T1 = &ctime($start_time));
chop ($T2 = &ctime($stop_time));

printf ("Broker Logfile Analysis for the %4.1f day period, starting\n",
    ($stop_time - $start_time) / 86400);
printf ("%s, and ending %s\n", $T1, $T2);

print "\n";

printf ("%20s: %5d\n",
	'Query Requests',	$query_request_count);
printf ("%20s: %5d (%2d%%)\n",
	'Query Results',	$query_result_count,
	100 * $query_result_count / $query_request_count + 0.5);
printf ("%20s: %5d (%2d%%)\n",
	'Invalid Queries',	$query_invalid_count,
	100 * $query_invalid_count / $query_request_count + 0.5);
printf ("%20s: %5d (%2d%%)\n",
	'Aborted Queries',	$query_aborted_count,
	100 * $query_aborted_count / $query_request_count + 0.5);
printf ("%20s: %5d (%2d%%)\n",
	'Lifetime Exceeded',	$query_lifetime_count,
	100 * $query_lifetime_count / $query_request_count + 0.5);

print "\n";

printf ("Averaged %.1f queries received per day\n",
	86400 * $query_request_count / ($stop_time - $start_time));

print "\n";

print <<EOF;
seconds                     QUERY RESPONSE TIME                  percent
------- ------------------------------------------------------------ ---
EOF


foreach $v ( values %QueryTime ) {
	$QueryTime{'max'} = $v if ($QueryTime{'max'} < $v);
	$QueryTime{'sum'} += $v;
}

for ($i=0; $i<15; $i++) {
	$len = int (60 * $QueryTime{$i} / $QueryTime{'max'});
	printf ("%3d-%3d ", $i*10, ($i+1)*10-1);
	print '*' x $len;
	print ' ' x (60-$len);
	printf (" %2d%% ", 100 * $QueryTime{$i} / $QueryTime{'sum'} + 0.5);
	print "\n";
}

print "\n";

print <<EOF;
count                     NUMBER OF MATCHED OBJECTS              percent
------- ------------------------------------------------------------ ---
EOF


foreach $v ( values %NumResults ) {
	$NumResults{'max'} = $v if ($NumResults{'max'} < $v);
	$NumResults{'sum'} += $v;
}

foreach $i ('zero', 0..13) {
	$len = int (40 * $NumResults{$i} / $NumResults{'max'});
	if ($i eq 'zero') {
		printf ("      0 ");
	} else {
		$lo = &ciel (10**($i/5));
		$hi = &ciel (10**(($i+1)/5)) - 1;
		printf ("%3d-%3d ", $lo, $hi);
	}
	print '*' x $len;
	print ' ' x (60-$len);
	printf (" %2d%% ", 100 * $NumResults{$i} / $NumResults{'sum'} + 0.5);
	print "\n";
}


printf ("\nQuery Options ($query_request_count total queries):\n");
printf ("%20s: %5d %2d%%\n",
	'description',
	$hash_desc,
	100 * $hash_desc / $query_request_count);
printf ("%20s: %5d %2d%%\n",
	'opaque',
	$hash_opaq,
	100 * $hash_opaq / $query_request_count);
printf ("%20s: %5d %2d%%\n",
	'matchword',
	$hash_index_matchword,
	100 * $hash_index_matchword / $query_request_count);

foreach $x ( sort numcmp keys %hash_index_error ) {
	printf ("%20s: %5d %2d%%\n",
		'error ' . $x, 
		$hash_index_error{$x},
		100 * $hash_index_error{$x} / $query_request_count);
}

foreach $x ( sort numcmp keys %hash_index_maxresult ) {
	printf ("%20s: %5d %2d%%\n",
		'maxresult ' . $x,
		$hash_index_maxresult{$x},
		100 * $hash_index_maxresult{$x} / $query_request_count);
}

foreach $x ( sort keys %hash_index_case ) {
	printf ("%20s: %5d %2d%%\n",
		'case ' . $x,
		$hash_index_case{$x},
		100 * $hash_index_case{$x} / $query_request_count);
}



printf ("\nQuery Terms ($query_request_count total queries):\n");
printf ("%20s: %5d %2d%%\n",
	'quoted query',
	$quoted_query,
	100 * $quoted_query / $query_request_count);
printf ("%20s: %5d %2d%%\n",
	'unquoted query',
	$unquoted_query,
	100 * $unquoted_query / $query_request_count);

foreach $x ( sort numcmp keys %n_query_terms ) {
	printf ("%20s: %5d %2d%%\n",
		$x . ' query terms  ',
		$n_query_terms{$x},
		100 * $n_query_terms{$x} / $query_request_count);
}

foreach $x ( sort numcmp keys %query_op ) {
	printf ("%20s: %5d %2d%%\n",
		"queries with '$x'",
		$query_op{$x},
		100 * $query_op{$x} / $query_request_count);
}


print <<EOF;

NOTE: count of query terms includes operations, so "this AND that"
counts as three query terms.
EOF



exit 0;


sub floor {
	$x = shift;
	int($x);
}

sub ciel {
	$x = shift;
	$x += 1.0 unless ($x == int($x));
	int($x);
}

sub numcmp {
	$a <=> $b;
}
