#!/usr/bin/perl -w

use PPI::Xref;

use strict;
use warnings;

use File::Basename qw[basename];
my $ME = basename($0);

use Getopt::Long;
Getopt::Long::Configure(qw[no_auto_abbrev]);

sub short_help_string {
    return <<__EOU__;
$ME: Usage:
$ME [--files|--subs|--packages|--modules|--missing_modules|--parse_errors] ...
$ME [--subs_files|--packages_files] ...
$ME [--files_lines|--total_lines] ...
$ME [--incs_files|--incs_chains|--incs_chains_reverse|--incs_deps] ...
$ME [--files_founts|--modules_counts] ...
$ME [--code=... [--auto_semicolon]]
$ME --parse
$ME --report_all
$ME --report_title
$ME --report_prefix=prefix
$ME --INC=dir,dir,dir
$ME --cache_directory=dir
$ME [--process_verbose|--recurse_verbose|--cache_verbose|--summary]
$ME [--separator=s|--column|--finish]
$ME --recurse|--norecurse
$ME [--help|--longhelp]
__EOU__
}

sub help {
    die short_help_string();
}

sub long_help {
    print short_help_string();
    die <<__EOU__;

The ... is either a list of files, or --code='...', or --files_from_file=file,
or --files_from_cache, or --files_from_system.

If --code is used, the filename is faked to be '-' in the reports.
The --files_from_file=file specifies a file to read the filenames
from, filename per line.  If the filename is '-', STDIN is used.

Note that the --code argument must be complete, e.g. 'use utf8;' must
have the semicolon.  For convenience, a semicolon is automatically added
unless there already is one, or the code argument ends in an end brace.
This can be disabled with --noauto_semicolon.

The --parse can be used to just parse the inputs (and possibly cache
the results, if --cache_directory is specified) but not to output
anything (except the --*verbose or --summary output).  One or more
of --parse or one of the report options (--packages et cetera,
read on) must be specified.

An "inc" is a "file inclusion", any of the use/no/require/do.

* --packages shows seen package statements, while the --modules shows
  module names included via use/no/require.

* --missing_modules (default: on) shows the missing modules (in addition
  to warning about them during scan).  Use --nomissing_modules to suppress.

* --parse_errors (default: on) shows the parse errors modules (in addition
  to warning about them during scan).  Use --noparse_errors to suppress.

* --subs_files, --packages_files, and --incs_files by default show only
  the start line, with --finish they show also the finish, and with
  --column also the column(s).  The --incs_chains only shows the start lines.

* --incs_files shows only the first level, the --incs_chains shows the
  full inclusion chains.  Warning: the number of inclusion chains can be
  even for the simplest codes.  Enjoy responsibly.

* --incs_chains_reverse instead of --incs_chains generates reversed
  inclusion chains, for inspecting reverse dependencies.

* --total_lines tells the total number of lines seen, while the
  --files_lines tells the lines per file.

* --files_counts and --modules_counts show how many times each file
  or module is referred to (first-level).

* --incs_deps shows what kind of tree node (root, branch, leaf, singleton)
  each file is in the total dependency tree.  It does not attempt to draw
  the actual tree graph.

* --report_all turns on all the reports.  By default these reports go
  to STDOUT, with --report_prefix you can direct them to files starting
  with the specified prefix.  The report titles ("===...===") can be
  turned off with --noreport_title.

The --cache_directory=dir can be used to cache the PPI processing results
under a directory.  This is usually much faster than processing the
files from scratch with PPI.  For paranoia, the directory must exist.

The --INC=dir,dir,dir can be used to specify an \@INC different from the
standard one.  Note the comma-separation.

The --separator default is the tabulator.

The default is to recurse through use/no/require/do statements,
use --norecurse to not to.

The --summary (default: on) turns on summary report at the end.
The --processverbose turns on display on processing progress.
The --cacheverbose turns on verbose cache access.
__EOU__
}

my %Opt = (
    separator => "\t",
    recurse => 1,
    report_title => 1,
    missing_modules => 1,
    parse_errors => 1,
    auto_semicolon => 1,
 );

help() unless GetOptions('files' => \$Opt{files},
                         'subs' => \$Opt{subs},
                         'packages' => \$Opt{packages},
                         'modules' => \$Opt{modules},
                         'missing_modules' => \$Opt{missing_modules},
                         'parse_errors' => \$Opt{parse_errors},
                         'total_lines' => \$Opt{total_lines},
                         'files_lines' => \$Opt{files_lines},
                         'subs_files' => \$Opt{subs_files},
                         'packages_files' => \$Opt{packages_files},
                         'incs_files' => \$Opt{incs_files},
                         'incs_chains' => \$Opt{incs_chains},
                         'incs_chains_reverse' => \$Opt{incs_chains_reverse},
                         'files_counts' => \$Opt{files_counts},
                         'modules_counts' => \$Opt{modules_counts},
                         'incs_deps' => \$Opt{incs_deps},
                         'parse' => \$Opt{parse},
                         'report_all' => \$Opt{report_all},
                         'report_prefix=s' => \$Opt{report_prefix},
                         'INC=s' => \$Opt{INC},
                         'cache_directory=s' => \$Opt{cache_directory},
                         'code=s' => \$Opt{code},
                         'auto_semicolon' => \$Opt{auto_semicolon},
                         'files_from_file=s' => \$Opt{files_from_file},
                         'files_from_cache' => \$Opt{files_from_cache},
                         'files_from_system' => \$Opt{files_from_system},
                         'process_verbose' => \$Opt{process_verbose},
                         'recurse_verbose' => \$Opt{recurse_verbose},
                         'cache_verbose' => \$Opt{cache_verbose},
                         'summary' => \$Opt{summary},
                         'separator=s' => \$Opt{separator},
                         'recurse!' => \$Opt{recurse},
                         'column' => \$Opt{column},
                         'finish' => \$Opt{finish},
                         'help' => \$Opt{help},
                         'longhelp' => \$Opt{longhelp});

help()if $Opt{help};
long_help() if $Opt{longhelp};

my @ACTION = qw[files subs packages modules missing_modules parse_errors
                subs_files packages_files files_lines total_lines
                files_counts modules_counts
                incs_files incs_deps incs_chains incs_chains_reverse];
                       
if ($Opt{report_all}) {
    @Opt{@ACTION} = (1) x @ACTION;
}

use List::Util qw[any];
help() unless (any { $Opt{$_} } @ACTION) || $Opt{parse};

$Opt{separator} = "\t" if $Opt{separator} eq '\t';  # Nicer to type in shells.

if ($Opt{process_verbose} ||
    $Opt{recurse_verbose} ||
    $Opt{cache_verbose} ||
    $Opt{summary}) {
    select(STDOUT); $| = 1;
}

my $xref_opt = {
   process_verbose => $Opt{process_verbose},
   recurse_verbose => $Opt{recurse_verbose},
   cache_verbose => $Opt{cache_verbose},
   recurse => $Opt{recurse},
};

if (defined $Opt{INC}) {
    $xref_opt->{INC} = [ split(',', $Opt{INC}) ];
}

if (defined $Opt{cache_directory}) {
  unless (-d $Opt{cache_directory} && -r $Opt{cache_directory} && -w $Opt{cache_directory}) {
    warn "$ME: Not a read-write directory --cache_directory='$Opt{cache_directory}'\n";
  }
  $xref_opt->{cache_directory} = $Opt{cache_directory};
}

my $xref = PPI::Xref->new($xref_opt);

package Times {
    use Time::HiRes ();
    sub get { bless [ Time::HiRes::time(), times() ] }
    sub diff {
        my @d = map { $_[0]->[$_] - $_[1]->[$_] } 0..2;
        return (@d, $d[1] + $d[2]);
    }
}

my $t0 = Times->get();

if (defined $Opt{files_from_file}) {
    my $fh;
    if ($Opt{files_from_file} eq '-') {
        $fh = *STDIN;
    } elsif (!open($fh, '<', $Opt{files_from_file})) {
        warn "$ME: Failed to open --files_from_file='$Opt{files_from_file}': $!\n";
    }
    use Scalar::Util qw[openhandle];
    if (openhandle($fh)) {
        while (<$fh>) {
            chomp;
            unless ($xref->process($_)) {
                warn "$ME: Failed to process '$_'\n";
            }
        }
    } else {
        help();
    }
}
if (defined $Opt{files_from_cache}) {
    $xref->process_files_from_cache();
}
if (defined $Opt{files_from_system}) {
    $xref->process_files_from_system();
}
if (defined $Opt{code}) {
    if ($Opt{auto_semicolon}) {
        unless ($Opt{code} =~ /[;}]\s*$/) {
            $Opt{code} .= ';';
        }
    }
    $xref->process(\$Opt{code});
}
if (@ARGV) {
    $xref->process(@ARGV);
}

my $t1 = Times->get();

sub emit {
    my ($xref, $opt, $cb) = @_;
    my $fh;
    if (defined $Opt{report_prefix}) {
        my $fn = $Opt{report_prefix} . "$opt.txt";
        open($fh, ">", $fn) or die qq[$0: Failed to create "$fn": $!\n];
        print "$ME: Created $fn\n";
    } else {
        $fh = *STDOUT;
    }
    my $report_title = $Opt{report_title};;
    if (($opt eq 'missing_modules' && !$xref->missing_modules) ||
        ($opt eq 'parse_errors' && !$xref->parse_errors_files)) {
        $report_title = 0;
    }
    print { $fh } "=== $opt ===\n" if $report_title;
    $cb->($xref, $fh);
}

unless ($Opt{parse}) {
    my %cb = (
        files => sub {
            my ($xref, $fh) = @_;
            for my $f ($xref->files) {
                print { $fh } $f, "\n";
            }
        },
        subs => sub{
            my ($xref, $fh) = @_;
            for my $s ($xref->subs) {
                print { $fh } $s, "\n";
            }
        },
        packages => sub {
            my ($xref, $fh) = @_;
            for my $p ($xref->packages) {
                print { $fh } $p, "\n";
            }
        },
        modules => sub {
            my ($xref, $fh) = @_;
            for my $p ($xref->modules) {
                print { $fh } $p, "\n";
            }
        },
        total_lines => sub {
            my ($xref, $fh) = @_;
            print { $fh } $xref->total_lines, "\n";
        },
        files_lines => sub {
            my ($xref, $fh) = @_;
            for my $f ($xref->files) {
                print { $fh } $f, $Opt{separator}, $xref->file_lines($f), "\n";
            }
        },
        files_counts => sub {
            my ($xref, $fh) = @_;
            for my $f ($xref->files) {
                print { $fh } $f, $Opt{separator}, $xref->file_count($f), "\n";
            }
        },
        modules_counts => sub {
            my ($xref, $fh) = @_;
            for my $f ($xref->modules) {
                print { $fh } $f, $Opt{separator}, $xref->module_count($f), "\n";
            }
        },
        missing_modules => sub {
            my ($xref, $fh) = @_;
            for my $m ($xref->missing_modules) {
                print { $fh } $m, $Opt{separator}, $xref->missing_module_count($m), $Opt{separator}, join($Opt{separator}, $xref->missing_module_lines($m)), "\n";
            }
        },
        parse_errors => sub {
            my ($xref, $fh) = @_;
            for my $f ($xref->parse_errors_files) {
                my %e = $xref->file_parse_errors($f);
                my @e = map { defined $e{$_} ? "$e{$_}" : $_ }
                        sort { $a cmp $b || $e{$a} cmp $e{$b} }
                        sort keys %e;
                print { $fh } $f, $Opt{separator}, "@e\n";
            }
        },
        incs_deps => sub {
            my ($xref, $fh) = @_;
            my $id = $xref->incs_deps;
            for my $f ($id->files) {
                print { $fh } $f, $Opt{separator}, $id->file_kind($f), "\n";
            }
        },
        subs_files => sub {
            my ($xref, $fh) = @_;
            my $sfi = $xref->subs_files_iter({separator => $Opt{separator},
                                              column => $Opt{column},
                                              finish => $Opt{finish}});
            while (my $sf = $sfi->next) {
                print { $fh } $sf->string, "\n";
            }
        },
        packages_files => sub {
            my ($xref, $fh) = @_;
            my $pfi = $xref->packages_files_iter({separator => $Opt{separator},
                                                  column => $Opt{column},
                                                  finish => $Opt{finish}});
            while (my $pf = $pfi->next) {
                print { $fh } $pf->string, "\n";
            }
        },
        incs_files => sub {
            my ($xref, $fh) = @_;
            my $ifi = $xref->incs_files_iter({separator => $Opt{separator},
                                              column => $Opt{column},
                                              finish => $Opt{finish}});
            while (my $if = $ifi->next) {
                print { $fh } $if->string, "\n";
            }
        },
        incs_chains => sub {
            my ($xref, $fh) = @_;
            my $ici = $xref->incs_chains_iter({separator => $Opt{separator}});
            while (my $ic = $ici->next) {
                print { $fh } $ic->string, "\n";
            }
        },
        incs_chains_reverse => sub {
            my ($xref, $fh) = @_;
            my $ici = $xref->incs_chains_iter({separator => $Opt{separator},
                                               reverse_chains => 1});
            while (my $ic = $ici->next) {
                print { $fh } $ic->string, "\n";
            }
        },
    );
    for my $opt (@ACTION) {
        if ($Opt{$opt}) {
            die qq[$ME: Unexpected callback '$opt'\n] unless defined $cb{$opt};
            emit($xref, $opt, $cb{$opt});
        }
    }
}

my $t2 = Times->get();

if ($Opt{summary}) {
    my ($dw1, $du1, $ds1, $dc1) = $t1->diff($t0);
    printf("$ME: files=%d lines=%d subs=%d\n",
                 $xref->files || 0,
                 $xref->total_lines || 0,
                 $xref->subs || 0);
    printf("$ME: docs_created=%d cache reads=%d writes=%d updates=%d creates=%d deletes=%d\n",
                 $xref->docs_created,
                 $xref->cache_reads,
                 $xref->cache_writes,
                 $xref->cache_updates,
                 $xref->cache_creates,
                 $xref->cache_deletes);
    printf("$ME: parse: wall=%.2fs user=%.2fs system=%.2fs cpu=%.2fs\n",
           $dw1, $du1, $ds1, $dc1);
    my ($dw2, $du2, $ds2, $dc2) = $t2->diff($t1);
    printf("$ME: output: wall=%.2fs user=%.2fs system=%.2fs cpu=%.2fs\n",
           $dw2, $du2, $ds2, $dc2);
}

exit(0);
