########################################################################
# sgmlspl script for a *supplemented* version of SGMLS.pm that, when
#    reading an nsgmls ESIS with empty tag labels, provides detection
#    of defined-empty elements with the method  $element->defempty .
#
# Document Type: article (GELLMU)
# Output: text/xml (non-public -- suffix .zml)
# Edited by: William F. Hammond
# Begun: 21 November 1998
#
# Prepare GELLMU article for a subsequent non-validating parse
# for processing to XHTML + MathML by (a) wrapping all math CDATA
# islands in a container "Cd0" and (b) providing the following
# filled-in attributes for all elements inside one of the 5 math
# containers (math, tmath, displaymath, equation, eqnarray)
########################################################################

$WhoAmI = "mathcdata.pl";
print STDERR $0, " *** ", $WhoAmI, "\n";

use utf8;
$utf8On = 1;
$pversion=$];
if($pversion >= 5.008001){
    use encoding "utf8";   # so input strings are parsed by unicode char
}
else{
    print STDERR "*** WARNING *** Installed version of Perl (", $pversion,
    ")\n", "    may not adequately recognize UTF-8 content in math mode\n",
    "    Version 5.8.1 or greater should be OK.\n";
};

use SGMLS;                      # Use the SGMLS package.
use SGMLS::Output;              # Use stack-based output.

# Global variables
$eltser=0;
$eltdepth=0;
$encoding="UTF-8";
$gmath = 0;     # Inside math or not?
$maxeltdepth=0;
$mdpth = 0;         # math depth
$mlvl = 0;          # math level
@mseq = ();         # sibling sequence at current math level, i.e.,
$mseq[0] = 0;       #   $mseq[$mlvl] = seq no of curr elt at curr level
                    #   attr mseq="3" means elt is seq 3 at its level
@mchld = ();        # name of element at given level and sequence, i.e.,
$mchld[0][0] = "";  #   $mchld[$lvl][$jseq]
@matts = ();        # attribute sequence at given level and sequence, i.e.,
$matts[0][0] = "";  #   $matts[$lvl][$jseq]
@nchld = ();        # number of children at given level
$nchld[0] = 0;      #   $nchld[$lvl] = no of children at curr level
@passopen=();       # For math: output for $mlvl passed from start_ to end_
$passopen[0] = "";
$xmlinput = 1;      # Is input an xml document?

###
sgml('start', sub{
    $eltdepth = 0;
    my $os = "";
    $os = $os . "<?xml version=\"1.0\" encoding=\"" . $encoding . "\"?>\n";
    $os = $os . "<!DOCTYPE article\n  SYSTEM "
        . '"http://www.albany.edu/~hammond/gellmu/xml/uxgellmu.dtd">' . "\n";
    $os = $os . "<?gellmu  This auxiliary file is not for distribution?>\n";
    output($os);
});

###
sgml('end', sub{
    if (!($eltdepth == 0)){
        print STDERR "WARNING: Coding error in sgmlspl file: ", $WhoAmI,
        "\n    eltdepth not balanced; ending with value ", $eltdepth, ".\n";
    };
    output("\n");
    print STDERR "  No. of elts.: ", $eltser,
                 "; max elt. depth ", $maxeltdepth, "\n";
});

###
sgml('start_element', sub{
    $eltser++;
    $eltdepth++;
    if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;};
    $currserial[$eltdepth] = $eltser;
    my ($elt, $event) = @_;
    my $name = $elt->name;
    my $oname = $name;
    my $os = "";
    my $plvl = 0;
    my $pseq = 0;
    if($name =~ /^(math|tmath|displaymath|equation|eqnarray)$/){
        $gmath = 1;
    };
    if($gmath > 0){
        $plvl = $mlvl;                 # level of parent for indexing
                                       #    information about this elt
        $mseq[$plvl]++;                # seq of this elt in its parent
        $pseq = $mseq[$plvl];
        $mchld[$plvl][$pseq] = $name;  # enter name as record for parent
        $mlvl++;                       # set level of curr elt
        $mseq[$mlvl] = 0;              # zero seq for children
    };
    my $open= "";
    my $osatt = "";
    my $clcm = "\n>";
    my $clc = $clcm;
    my @attn = ();
    my @atty = ();
    my @attv = ();
    my $jj = 0;
    my $nat = get_attributes($elt, \@attn, \@atty, \@attv);
    if($nat > 0){
        $jj = 0;
        while($jj < $nat){
            if(($atty[$jj] =~ /CDATA|IMPLIED|TOKEN/ ) && ($attv[$jj] ne "")){
                if(!(($gmath == 0) &&
                     (($attn[$jj] eq "mml") || ($attn[$jj] eq "mtype")))){
                    $osatt = $osatt . " " . $attn[$jj] 
                                    . '="' . $attv[$jj] . '"';
                };
            };
            $jj++;
        };
    };
    if($elt->defempty){
        if( ($name eq "nul") && ($pan eq "mathsym") ){
            return;
        };
        $clcm = "\n/>";
        $clc = $clcm;
	$os = "<" . $oname . $osatt . $clc;
	if($gmath > 0){
	    if(($elt->within("mbox") eq "")&&($elt->within("text") eq "")&&
	       ($elt->within("label") eq "")&&($elt->within("klabel") eq "")&&
               ($elt->within("eqntag") eq "")&&($elt->within("tag") eq "")){
		$os = "<Em0>" . $os . "</Em0>";
	    };
	    $mlvl--;
	};
	output($os);
    }
    elsif($gmath > 0){
        $passopen[$mlvl] = $osatt;
	push_output('string');
    }
    else{
	$os = "<" . $oname . $osatt . $clc;
        output($os);
    };
});

###
sgml('end_element', sub{
    $eltdepth--;
    my ($elt, $event) = @_;
    if($elt->defempty){
        return;
    };
    my $name = $elt->name;
    my $oname = $name;
    my $pan = "";
    my $pos = "";
    my $harvest = "";
    my $os = "";
    my $osatt = "";
    my $open = "";
    my $close = "";  # Output string beginning with closetag
    my $clcm = "\n>";
    my $jseq = 0;
    my @jchld = ();
    my $jj = 0;
    my $plvl = 0;
    my $pseq = 0;
    if($gmath > 0){
	$pos = pop_output;
        $osatt = $osatt . $passopen[$mlvl];
	$open = $open . "<" . $oname;
        $jseq = $mseq[$mlvl];     # no of children of curr elt
        $nchld[$mlvl] = $jseq;
        $open = $open . $osatt . $clcm;
	$harvest = $pos;
        $os = $os . $open . $harvest;
        $close = "</" . $oname . $clcm;
        $os = $os . $close;
        $mlvl--;                  # back to level of parent
        $plvl = $mlvl;
        $pseq = $mseq[$plvl];
	output($os);
        if($name =~ /^(math|tmath|displaymath|equation|eqnarray)$/){
            $gmath = 0;
            if($mlvl != 0){
                print STDERR $WhoAmI,
                " -- WARNING: Variable mlvl unbalanced in ", $name ,
                " value: ", $mlvl, "\n";
            };
        };
    }
    else{
        $close = "</" . $oname . $clcm;
        $os = $os . $close;
        output($os);
    };
});

###
sgml('cdata', sub{
    my $os = "";
    my $ds = $_[0];
    my $pds = "";
    my $len = 0;
    my @iar = ();
    my $iarl = 0;
    my @oar = ();
    my $oarl = 0;
    my @tar = ();
    my $tarl = 0;
    my $tys = "";
    my $jj = 0;
    my $kk = 0;
    my $ll = 0;
    my $isnum = 0;
    my @types=();
    my $parseq = 0;
    my $eltname = "";
#  Now split CDATA inside math zones into atomic units for math parsing
#  "Cd0" is the catchall; attribute "type" with values "number", "letter",
#  "character", or "text".
    if($mlvl > 0){
	$parseq = $mseq[$mlvl-1]; # seq. no. of curr. elt. in its parent
	$eltname = $mchld[$mlvl-1][$parseq];
	$ev = $_[1];
	$elt = $ev->element;
	if(!($elt->name eq $eltname)){
	    print STDERR $WhoAmI, " -- WARNING: element name misfiled\n";
	};
        # First: do case when a single Cd0 will be used.
        # These are text strings
	my $txtcds = '^(' . 'func|mbox|label|klabel|text|arrcols|arrpos'
	    . '|tabarg|tabopt|eqnkey|eqnser|eqntag|tag|csep' . ')$';
	if($eltname eq "text"){
	    if($ds =~ /^\s+/){
		$ds =~ s/^\s+//;
		$os = $os . "<spc/>";
            };
	    if($ds =~ /\s+$/){
		$ds =~ s/\s+$//;
		$pds = "<spc/>";
	    };
	};
	if($eltname =~ /$txtcds/){
	    $os = $os . '<Cd0 type="text">' . $ds . '</Cd0>';
	    $os = $os . $pds;
	    output($os);
	    return;
        }
	elsif(!($elt->within("mbox") eq "")||!($elt->within("text") eq "")||
	      !($elt->within("label") eq "")||!($elt->within("klabel") eq "")||              !($elt->within("eqntag") eq "")||!($elt->within("tag") eq "")){
	    $os = $os . $ds;
	    output($os);
	    return;
	};
        # Look for numbers, possibly with decimals; use '<' as temp separator
        $ds =~ s/((\.\d+)|(\d+\.?\d*))/<\1</g;
	@tar = split('<', $ds);
	$tarl = scalar(@tar);
	$ll = 0;
	while($ll < $tarl){
	    if($tar[$ll] =~ /^((\.\d+)|(\d+\.?\d*))$/){
		$os = $os . '<Cd0 type="number">' . $tar[$ll] . "</Cd0\n>";
	    }
	    else{
		@iar = split('', $tar[$ll]);
		$iarl = scalar(@iar);
		$jj = 0;
		while($jj < $iarl){
		    if($iar[$jj] eq '('){
			$os = $os . '<bal>';
		    }
		    elsif($iar[$jj] eq ')'){
			$os = $os . "</bal\n>";
		    }
		    elsif($iar[$jj] eq '['){
			$os = $os . '<balsb>';
		    }
		    elsif($iar[$jj] eq ']'){
			$os = $os . '</balsb>';
		    }
		    elsif($iar[$jj] eq '+'){
			$os = $os . "<plus/>";
		    }
		    elsif($iar[$jj] eq '-'){
			$os = $os . "<minus/>";
		    }
		    elsif($iar[$jj] =~ /^[[:alpha:]]$/){
			$os = $os . '<Cd0 type="letter">' . $iar[$jj]
			    . "</Cd0\n>";
		    }
		    elsif($iar[$jj] =~ '\s'){
			$os = $os . "";
		    }
		    else{
			$os = $os . '<Cd0 type="character">' . $iar[$jj]
			    . "</Cd0\n>";
		    };
		    $jj++;
		};
	    };
	    $ll++;
	};
	output($os);
    }
    else{
        $os = $_[0];
        output($os);
    };
});

###
sgml('pi',sub{
    my $inst = $_[0];
    my $os = "";
    $inst =~ s/\\n\\012/ /g ;
    if($inst =~ /^xml /){
        $xmlinput = 1;   # set above anyway
    }
    else{
        $os = "<?" . $inst . "?>\n";
    };
    output($os);
});

###
sgml('re', sub{
    return;
});

###
sub get_attributes {
    my ($elt, $rattn, $ratty, $rattv) = @_;
    # $rattn, $rattv are references to passed arrays
    my $attref = $elt->attributes;
    my %atts = %$attref;
    my @atns = keys(%atts);
    my $nat = scalar @atns;
    my $jj = 0;
    my $jty = "";
    my $jvs = "";
    while($jj < $nat){
        $$rattn[$jj] = $atns[$jj];
        $jty = $atts{$atns[$jj]}->type;
        $$ratty[$jj] = $jty;
        if($jty eq "CDATA"){
            $jvs = $atts{$atns[$jj]}->value;
        }
        elsif($jty eq "TOKEN"){
            $jvs = $atts{$atns[$jj]}->value;
        }
        elsif($jty eq "IMPLIED"){
            $jvs = $atts{$atns[$jj]}->value;
        }
        elsif(($jty eq "NOTATION")||($jty eq "ENTITY")){
            $jvs = "WARNING: attribute type " . $jty
                   . " not supported by this code";
        }
        else{
            $jvs = "WARNING: ATTRIBUTE TYPE ERROR: " . $jty;
        }
        $$rattv[$jj] = $jvs;
        $jj++;
    };
    return $nat;
};

1;
