c
c   This file contains a very simple program for the 2-d wave equation on a
c   square, using a simple Leapfrog scheme.  This is intended as a simple
c   example.  As an option, it can be setup to generate graphical output.
c   Because this is a Fortran example, it takes input from standard input
c   (the console or whatever) rather than the command line.  
c
c     int mdim,          /* size of array in this dimension */
c     int is_parallel,   /* true if the array is parallelized in
c                           this dimension */
c     int start,         /* starting index for local piece */
c     int end,           /* ending index for local piece */
c     int loc            /* location in this dimension of the 
c                           processor */
c     int ndim           /* number of processors in this dimension,
c 	                   -1 if unspecified */
c     int sg             /* start and end ghost limits */
c     int eg             /* start and end ghost limits */
c     int nsg            /* start and end ghost limits for neighbor */
c     int neg
c
       integer function worker()
       integer mx, my, nd
       parameter(nd=2, NBYTES=8)
       integer maxnx, maxny
       parameter(maxnx = 258, maxny = 258 )
c      Fields in the "SZ" array 
       include '../meshf.h'
       integer pimytid, pinumtids
       integer pgm, sz(0:9,0:nd-1)
       integer nstep, nx, ny
       integer sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a(maxnx*maxny), b(maxnx*maxny),
     *                  c(maxnx*maxny)
       integer myid, nproc
       double  precision t1, t2, SYGetElapsedTime
c
c Define the domain as a 2-d mesh of size mx x my, to be subdivided 
c in both dimensions
c
       mx    = 256
       my    = 256
       nstep = 10
c
c This needs code to read from one and distribute to all.
c      if (PImytid() .eq. 0) then
c       print *, 'Mesh points in each direction for global problem'
c       read  *, mx
c       my = mx
c       print *, 'Number of time cycles (3 steps/cycle)'
c       read *, nstep
c       call PIgscatter( ... )
c      else
c       call PIgscatter( ... )
c      endif
c
       sz(szmdim,0)       = mx
       sz(szisparallel,0) =  1
       sz(szndim,0)       = -1
       sz(szmdim,1)       = my
       sz(szisparallel,1) =  1
       sz(szndim,1)       = -1
c
c      Build the communications pattern by:
c 
c      1. Compute the size of the ghost-points from the computational 
c         stencil
c      2. Compute the local part of the array that this processor 
c         is responsible for
c      3. Build the communication pattern and "compile" it
c
c      Setup the ghost points from the stencil 
c      Note that this assumes doublely periodic boundary conditions.
       call BCFindGhostFromStencil( nd, sz, 0, 0, boxstencil )

       myid  = pimytid()
       nproc = pinumtids()
       call BCGlobalToLocalArray( nd, sz, nproc, myid )
c
       pgm = BCBuildArrayPGM( nd, sz, nproc, myid, NBYTES )
       call BCUseOrderedSend( pgm )
       call BCArrayCompile( pgm, 0 )
c
c      Compute the parameters of our part of the domain
c
       sx   = sz(szstart,0) + 1
       ex   = sz(szend,0) + 1
       sxgp = sz(szsg,0)
       exgp = sz(szeg,0)
       nx   = ex - sx + 1 + sxgp + exgp
       sy   = sz(szstart,1) + 1
       ey   = sz(szend,1) + 1
       sygp = sz(szsg,1)
       eygp = sz(szeg,1)
       ny   = ey - sy + 1 + sygp + eygp
c
c      Sanity check on subproblem size
       if (ny * nx .gt. maxnx * maxny) then
	   print *, 'Selected domain too large '
	   worker = 1
	   return
       endif
c
c      initialize a and b
       call InitDomain( a, b, mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
c
       call PIgsync(0)
       t1 = SYGetElapsedTime()
c
c      To simplify the code and to keep from having to copy a to b etc, we
c      do 3 timesteps per loop
       call BCexec( pgm, a, a )
       do i = 0, nstep-1, 3
          call BCexec( pgm, b, b )
c         Optionally display once every three time-steps with 16 colors
c         (comment out call to dispmesh to suppres graphics).  Do this here
c         because display NEEDS ghost-point values
c          call dispmesh( b, sx, sxgp, ex, exgp, sy, sygp, ey, eygp, 
c     *                   mx, my, 16 )
          call TimeStep( a, b, c, 
     *                   mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp )
          call BCexec( pgm, c, c )
          call TimeStep( b, c, a, 
     *                   mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp )
          call BCexec( pgm, a, a )
          call TimeStep( c, a, b, 
     *                   mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp )
       enddo
       t2  = SYGetElapsedTime() - t1
c      ops = points_in_grid * steps * ops/point
       ops = mx * my * i * 9
       print *, ops / (1.0e6 * t2), ' Megaflops'
       print *, 'Total time = ', t2, ' on ', pimytid()
c
       call BCfree( pgm )
c
       worker = 0
       return
       end
c
       subroutine InitDomain( a, b, mx,my,
     +                           sx,sxgp,ex,exgp,sy,sygp,ey,eygp)
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       double precision b(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       double precision hx, hy
c
c     Zero everything including ghost points
c
       do j = sy-sygp,ey+eygp
          do i = sx-sxgp,ex+exgp
             a(i,j) = 0.0d0
	     b(i,j) = 0.0d0
          enddo
       enddo
c
c ---  initialize the interior as a simple gaussian.  Need to modify this
c      to be a single expanding wave.
c
       hx = 1.0 / (mx - 1)
       hy = 1.0 / (my - 1)
       do j = sy, ey
          do i = sx, ex
             a(i,j) = exp( -( (i*hx - 0.5)**2 + (j*hy - 0.5)**2 ) )
	     b(i,j) = exp( -( (i*hx - 0.5)**2 + (j*hy - 0.5)**2 ) )
          enddo
       enddo
       return
       end
c
       subroutine TimeStep( a, b, c, 
     *                  mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp )
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       double precision b(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       double precision c(sx-sxgp:ex+exgp,sy-sygp:ey+eygp)
       integer i, j
c     
c      Code to time step the problem using Leapfrog, using a CFL # of 0.8
c      This assumes a fully periodic domain.
       do 10 j=sy,ey
          do 10 i=sx,ex
             c(i,j) = 2.0d0 * b(i,j) - a(i,j) + 0.8 * 
     *               (b(i+1,j) - 4.0 * b(i,j) + b(i-1,j) + 
     *                b(i,j+1)                + b(i,j-1))
 10    continue
       return
       end
