#ifndef lint
static char SCCSid[] = "@(#) ./ilu/ilund.c 07/23/93";
#endif

#include "tools.h"
#include "sparse/spmat.h"
#include "sparse/sppriv.h"
#include "inline/spops.h"
#include <math.h>

void SpiNDSort();

/*
  Nickolas S. Jovanovic suggests these options:
  
  Compare to column instead of row
  Use Max norm instead of 2-norm
  Compare to diagonal (on row or column) instead of norm
  
  "After adding the above options, I have other strategies to also try to
  incorporate which are more complicated, such as the local matrix inverse
  idea of Benson & Frederickson as implemented by Vavasis, and maybe a
  version of the fast decomposition idea of Rezayat."

 */

/*@
   SpComputeILUND - Compute ILU factor, using a numeric drop tolerance, for
                      matrix B in BB (descriptors allocated but
                      not set). BB should have been obtained previously
                      by SpCreateSplit(). 

   Input Parameters:
.  B    - matrix being factored
.  BB   - matrix to contain factor
.  rtol - relative tolerance for fill
.  atol - absolute tolerance for fill.
.  fill - maximum number of added elements per row for each of L and U.  

   Note:
   This routine computes the factor simultaneously with computing the
   factor, since the factor is what determines which elements are kept.

   This routine uses a fairly simple-minded algorithm that is O(n^2)
   rather than O(nz).

   The code here owes some to the ilut code of Freund and Nachtigal.
   However, there are a number of changes (beyond just the change in 
   datastructure).  In particular, the sort routine is entirely different
   and the code to determine the elements of L is different if the 
   array is not sorted (F&N's code stops collecting elements of L when
   the first small element is reached, regardless of whether the entries
   are sorted).
 @*/
int SpComputeILUND( B, BB, rtol, atol, fill )
SpMat       *B;
SpMatSplit  *BB;
double      rtol, atol;
int         fill;
{
int     prow, row, *xi, nz, nzi, n, nnz, nzf, 
        *nzl, err, *itmp, i, mincol, col, *icol, lcnt, j, cnt, jj, lenl, lenu,
        *ival, ucnt, maxcol;
double  *rtmp, *dn, sum, *rval, *rcol, *xv, multiplier, *pc, dtmp, avg;
SpMat   *BBf = BB->factor;
SpVec   *pivot_row, *elim_row, **rs, **frs;
int     *r, *ic;
SpRowMat *R = (SpRowMat *)B->data, *RR = (SpRowMat *)BBf->data;

/* Allocate temporaries:
   rtmp - row temp, compute the factored row here (using the previous factored
          terms)
   rcol,icol - contains that part of rtmp that was selected
   dn   - dn[0:prow-1] is the 1 norm of the rows of U; dn[prow] is the 1 norm
          of the row of A.
   rval,ival - contains the values of the filled row and the locations
 */
TRPUSH(SPTRID + 9);
n    = B->rows;
rtmp = (double *)MALLOC( 4 * n * sizeof(double) ); CHKPTRV(rtmp,1);
itmp = (int *)   MALLOC( 4 * n * sizeof(int) );    CHKPTRV(itmp,1);
dn   = rtmp + n;
rval = dn   + n;
rcol = rval + n;
ival = itmp + n;
icol = ival + n;
for (i=0; i<n; i++) rtmp[i] = 0.0;

/* Handle maps */
if (BBf->map) {
    r  = BBf->map->rowmap;
    ic = BBf->map->icolmap;
    }
else {
    r = ic = (int *)0;
    }
BB->nzl[0] = 0;
rs         = R->rs;
frs        = RR->rs;
nzl        = BB->nzl;
for (prow=0; prow<n; prow++) {
    /* Update row "prow" using all rows that are needed to eliminate
       entries on this row.  */

    /* First, unpack the row into a dense vector.  Also compute the
       1-norm of the row and the number of sub- and super-diagonal
       elements, as well as the maximum number of elements to permit */
    elim_row  = rs[r ? r[prow] : prow];
    xi        = elim_row->i;
    xv        = elim_row->v;
    nz        = elim_row->nz;
    sum       = 0.0;
    lenl      = 0;
    if (ic) {
	mincol    = n+1;
	maxcol    = -1;
	while (nz--) {
	    col       = ic[*xi++];
	    if (col < mincol) mincol = col;
	    if (col > maxcol) maxcol = col;
	    sum       += fabs(*xv);
	    rtmp[col] = *xv++;
	    if (col < prow) 
		lenl++;
	    }
	}
    else {
	mincol = xi[0];
	maxcol = xi[nz-1];
	while (nz--) {
	    sum       += fabs(*xv);
	    col       = *xi++;
	    rtmp[col] = *xv++;
	    if (col < prow) 
		lenl++;
	    }
	}
    lenu     = elim_row->nz - lenl;
    lenu     += fill;
    lenl     += fill;
    dn[prow] = sum;
    avg      = rtol * sum / elim_row->nz;
    
    /* Eliminate into rtmp */
    row = mincol;
    while (row < prow) {
	pc        = rtmp+row;
	if (*pc != 0.0) {
	    pivot_row  = frs[row];
	    nz         = nzl[row];
	    /* Get the pivot row */
	    xi         = pivot_row->i + nz;
	    xv         = pivot_row->v + nz;
	    if (*xi++ != row) 
		{SETERRC(-row-1,"Zero pivot encountered"); 
		 TRPOP;
                 return -(row+1);}  /* zero (missing) pivot */
	    /* Do the elimination in place */
	    multiplier = *pc * *xv++;
	    *pc        = multiplier;
	    nnz        = pivot_row->nz - nz - 1;
	    if (xi[nnz-1] > maxcol) maxcol = xi[nnz-1];
	    SPARSEDENSESMAXPY(rtmp,multiplier,xv,xi,nnz);
	    }
	row++;
	};

    /* Compute what of the row we'll keep */
    /* Lower triangle first, using abs(l(i,j)) * onenorm(u(j,:)) */
    lcnt = 0;
    for (j=mincol; j<prow; j++) {
	if (rtmp[j] != 0.0) {
	    rval[j]      = fabs(rtmp[j]) * dn[j];
	    ival[lcnt++] = j;
	    }
	}
    if (lcnt > lenl) {
	SpiNDSort( lcnt, rval, ival, 0, lcnt-1 );
	lcnt = lenl;
	}

    /* Select the topmost values */
    dtmp = rtol * dn[prow];
    cnt  = 0;
    nzi  = 0;
    while (cnt < lcnt) {
	jj = ival[cnt++];
	/* This is the wrong test; should probably be tol*dn[jj] */
	if (rval[jj] < dtmp) {
	    /* Note that if the values are not sorted, we can't 
	       just break out of the loop here */
	    continue;
	    }
	icol[nzi]   = jj;
	rcol[nzi++] = rtmp[jj];
	}
    /* Save the number of non-zeros in the lower triangle */
    nzl[prow] = nzi;

    /* ALWAYS include the diagonal element; update it if it is too small */
    /* If the value is less than the tolerance time the 1-norm of the row, 
       replace the diagonal */
    if (fabs(rtmp[prow]) < avg) 
           rtmp[prow] = (rtmp[prow] < 0.0) ? - avg : avg;
    icol[nzi] = prow;
    rcol[nzi] = 1.0 / rtmp[prow];

    /* A similar approach for the upper triangle, except that no elements
       are dropped for size (just for fill amount) */
    ucnt = 0;
    for (j=prow+1; j<=maxcol; j++) {
	if (rtmp[j] != 0.0) {
	    rval[j]      = fabs(rtmp[j]);
	    ival[ucnt++] = j;
	    }
	}
    if (ucnt > lenu) {
	SpiNDSort( ucnt, rval, ival, 0, ucnt-1 );
	ucnt = lenu;
	}
    /* Select the topmost values */
    cnt  = 0;
    dtmp = 0.0;
    while (cnt < ucnt) {
	jj                = ival[cnt];
	icol[nzi+1+cnt]   = jj;
	rcol[nzi+1+cnt++] = rtmp[jj];
	dtmp             += fabs(rtmp[jj]);
	}
    dn[prow] = dtmp;
    nzf = nzi + 1 + cnt;

    /* Allocate elim_row */
    elim_row = frs[prow];
    SPiMallocNVt(BBf,nzf,&elim_row->v,&elim_row->i,err); CHKERRV(ERR_NO_MEM,1);
    elim_row->maxn = nzf;
    elim_row->nz   = nzf;

    /* Store filled row */
    xi  = elim_row->i;
    xv  = elim_row->v;
    for (i=0; i<nzf; i++) {
	xi[i] = icol[i];
	xv[i] = rcol[i];
	}
    SpSort( xi, xv, nzf );
    for (i=mincol; i<=maxcol; i++) rtmp[i] = 0.0;
    }

/* Recover temp used to find fill */
FREE( rtmp );
FREE( itmp );
/* SpPrint( stdout, BB ); */
TRPOP;
return ERR_NONE;
}

/* 
   internal routine that sorts r and returns the INDICES of the sorted 
   values (r is sparse; we sort r[idx] and permute idx to insure
   that r[idx] is in descending order).
 */
#define SWAP(a,b,c) {c=a;a=b;b=c;}
void SpiNDSort( n, rv, idx, l, r )
int    n, *idx, l, r;
double *rv;
{
int    i, itmp, last;
double dtmp;

if (l >= r) return;
SWAP(idx[l],idx[(l+r)/2],itmp);
last = l;
dtmp = rv[idx[l]];
for (i=l+1; i<=r; i++) {
    if (rv[idx[i]] > dtmp) {
	last++;
	SWAP(idx[last],idx[i],itmp);
	}
    }
SWAP(idx[l],idx[last],itmp);
SpiNDSort( n, rv, idx, l, last - 1 );
SpiNDSort( n, rv, idx, last + 1, r );
}
