#!/usr/bin/perl

use strict;
use Time::HiRes qw(time);

use Syntax::Kamelon;

my @attributes = Syntax::Kamelon->AvailableAttributes;

my %formtab = ();
for (@attributes) {
	$formtab{$_} = ["<font class=\"$_\">", "</font>"]
}

my $substitutions = {
	'<' => '&lt;',
	'>' => '&gt;',
	'&' => '&amp;',
	' ' => '&nbsp;',
	"\t" => '&nbsp;&nbsp;&nbsp;',
	"\n" => "<BR>\n",
};

my $cycles = 5;
my $mode = 'text';
my $verbose = 0;

my %shortargs = (
	'-c' => sub  {
		$cycles = shift @ARGV;
		unless ($cycles =~ /^\d+$/) { die '-c or -cycles should have a positive integer value' }
	},
	'-m' => sub  {
		$mode = shift @ARGV;
		unless ($mode =~ /^text|line|debug$/) { die '-m or -mode can be text, line or debug' }
	},
	'-v' => sub  { $verbose = 1 },
);

my %args = (%shortargs,
	'-cycles' => $shortargs{'-c'},
	'-mode' => $shortargs{'-m'},
	'-verbose' => $shortargs{'-v'},
);

while (@ARGV) {
	my $t = shift @ARGV;
	if (exists $args{$t}) {
		my $call = $args{$t};
		&$call;
	} else {
		die "invalid argument $t, valid arguments -c, -cycles, -m, -mode, -v, -verbose";
	}
}

print "\nMode: $mode\n";
print "Cycles: $cycles\n";
print "Verbose: $verbose\n\n";


my %langs = (
	'.desktop' => "highlight.desktop",
	'AHDL' => "highlight.ahdl",
	'ASP' => "highlight.asp",
	'AVR Assembler' => "highlight.asm",
	'AWK' => "highlight.awk",
	'Bash' => "highlight.sh",
	'BibTeX' => "highlight.bib",
	'C++' => "highlight.cpp",
	'CMake' => "highlight.cmake",
	'CSS' => "highlight.css",
	'Clipper' => "highlight.prg",
	'Common Lisp' => "highlight.lisp",
	'Doxygen' => "highlight.dox",
	'Eiffel' => "highlight.e",
	'Euphoria' => "highlight.exu",
	'Fortran' => "highlight.f90",
	'GLSL' => "highlight.glsl",
	'HTML' => "highlight.html",
	'Haskell' => "highlight.hs",
	'Intel x86 (NASM)' => "highlight.asm",
	'JSP' => "highlight.jsp",
	'Java' => "highlight.java",
	'JavaScript' => "highlight.js",
	'LaTeX' => "highlight.tex",
	'Lex/Flex' => "highlight.lex",
	'LilyPond' => "highlight.ly",
	'Literate Haskell' => "highlight.hs",
	'Matlab' => "highlight.m",
	'Octave' => "highlight.m",
	'PHP/PHP' => "highlight.php",
	'POV-Ray' => "highlight.pov",
	'Perl' => "highlight.pl",
	'PicAsm' => "highlight.asm",
	'Pike' => "highlight.pike",
	'PostScript' => "highlight.ps",
	'PureBasic' => "highlight.pb",
	'Python' => "highlight.py",
	'Quake Script' => "highlight.rib",
	'Ruby' => "highlight.rb",
	'Scheme' => "highlight.scheme",
	'Spice' => "highlight.sp",
	'Stata' => "highlight.do",
	'Tcl/Tk' => "highlight.tcl",
	'UnrealScript' => "highlight.uc",
	'VRML' => "highlight.wrl",
	'XML' => "highlight.xml",
	'xslt' => "highlight.xsl",
);

my $k = Syntax::Kamelon->new(
	verbose => $verbose,
	formatter => ['Base',
		substitutions => $substitutions,
		format_table => \%formtab,
	],
);

my @langl = sort keys %langs;

my %results = ();

foreach my $ln (@langl) {
	my $cycle = 1;
	while ($cycle <= $cycles) {
		print "testing language $ln\n";
		my $fl = $langs{$ln};
		unless (exists $results{$ln}) {
			$results{$ln} = {
				size =>  0,
				hlmax => 0,
				hlmin => '',
				hltot => 0,
			};
		}
		my $r = $results{$ln};
		$k->Syntax($ln);
		$k->Reset;
		open(TST, "<samples/$fl") or die "cannot open $fl";
		my $hltp = 0;
		my $fmtp = 0;
		while (<TST>) { 
			my $txt = $_;
			if ($cycle eq 1) {
				$r->{'size'} = $r->{'size'} + length($txt);
			}
			my $start1 = time;
			my @dummy = $k->Parse($txt);
			my $hlbench = time - $start1;
			$r->{'hltot'} = $r->{'hltot'} + $hlbench;
			if ($hlbench > $r->{'hlmax'}) {
				$r->{'hlmax'} = $hlbench
			};
			if (($r->{'hlmin'} eq '') or ($hlbench < $r->{'hlmin'})) {
				$r->{'hlmin'} = $hlbench
			}
			$k->Reset;
		}
		my $cs = $cycle * $r->{'size'};
		if ($r->{'hltot'} > 0) {
			$hltp = int($cs / $r->{'hltot'});
		}
		print "highlighting hltot " . $r->{'hltot'} . " seconds, avg throughput $hltp bytes/second\n" ;
		close TST;
		$cycle ++;
	}
}

my %modes = (
	hl => '\$kate->Highlight',
	fm => '\$kate->HighlightText',
);

print "\n====================================================\n";
print "BENCHMARK REPORT FOR Syntax::Kamelon\n";
print "====================================================\n\n";
foreach my $md ('hl') {
	my $mt = $modes{$md};
	print "Report for $mt, done $cycles iterations.\n\n";
	print "language        size        min         max         total       throughput\n";
	print "                testfile    time        time        time        (bytes/second)\n\n";
	my $mintot = 0;
	my $maxtot = 0;
	my $tottot = 0;
	my $siztot = 0;
	foreach my $l (sort keys %results) {
		my $txt = $l;
		$txt = addspaces($txt, 16);
		my $r = $results{$l};
		$txt = $txt . $r->{'size'};
		$siztot = $siztot + $r->{'size'};
		$mintot = $mintot + $r->{$md . 'min'};
		$maxtot = $maxtot + $r->{$md . 'max'};
		$tottot = $tottot + $r->{$md . 'tot'};
		$txt = addspaces($txt, 28);
		$txt = $txt . sprintf("%4f", $r->{$md . 'min'});
		$txt = addspaces($txt, 40);
		$txt = $txt . sprintf("%4f", $r->{$md . 'max'});
		$txt = addspaces($txt, 52);
		$txt = $txt . sprintf("%4f", $r->{$md . 'tot'});
		$txt = addspaces($txt, 64);
		$txt = $txt . calcaverage($r->{'size'}, $r->{$md . 'tot'});
		print "$txt\n";
	}
	print "\n";
	my $txtt = addspaces('Totals', 16);
	$txtt = $txtt . $siztot;
	$txtt = addspaces($txtt, 28);
	$txtt = $txtt . sprintf("%4f", $mintot);
	$txtt = addspaces($txtt, 40);
	$txtt = $txtt . sprintf("%4f", $maxtot);
	$txtt = addspaces($txtt, 52);
	$txtt = $txtt . sprintf("%4f", $tottot);
	$txtt = addspaces($txtt, 64);
	$txtt = $txtt . calcaverage($siztot, $tottot);
	print "$txtt\n";
	my $txta = addspaces('Averages', 16);
	my $numlang = @langl;
	if ($numlang > 0) {
		$txta = $txta . int($siztot/$numlang);
		$txta = addspaces($txta, 28);
		$txta = $txta . sprintf("%4f", $mintot/$numlang);
		$txta = addspaces($txta, 40);
		$txta = $txta . sprintf("%4f", $maxtot/$numlang);
		$txta = addspaces($txta, 52);
		$txta = $txta . sprintf("%4f", $tottot/$numlang);
	}
	print"$txta\n";
	print "====================================================\n\n";
}

sub addspaces {
	my $t = shift;
	my $l = shift;
	while (length($t) < $l) {
		$t = "$t ";
	}
	return $t;
}

sub calcaverage {
	my ($size, $time) = @_;
	if ($time eq 0) {
		return 'division by 0'
	} else {
		return int(($cycles * $size)/$time);
	}
}
