/* motion.c, motion estimation                                              */

/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */

#include <stdio.h>
#include "config.h"
#include "global.h"

/* private prototypes */

static void frame_estimate _ANSI_ARGS_((unsigned char *org,
  unsigned char *ref, unsigned char *mb,
  int i, int j,
  int sx, int sy, int *iminp, int *jminp, int *imintp, int *jmintp,
  int *iminbp, int *jminbp, int *dframep, int *dfieldp,
  int *tselp, int *bselp));

static int fullsearch _ANSI_ARGS_((unsigned char *org, unsigned char *ref,
  unsigned char *blk,
  int lx, int i0, int j0, int sx, int sy, int h, int xmax, int ymax,
  int *iminp, int *jminp));

static int dist1 _ANSI_ARGS_((unsigned char *blk1, unsigned char *blk2,
  int lx, int hx, int hy, int h));

static int dist2 _ANSI_ARGS_((unsigned char *blk1, unsigned char *blk2,
  int lx, int hx, int hy, int h));

static int bdist1 _ANSI_ARGS_((unsigned char *pf, unsigned char *pb,
  unsigned char *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h));

static int bdist2 _ANSI_ARGS_((unsigned char *pf, unsigned char *pb,
  unsigned char *p2, int lx, int hxf, int hyf, int hxb, int hyb, int h));

static int variance _ANSI_ARGS_((unsigned char *p, int lx));

/*
 * motion estimation for progressive and interlaced frame pictures
 *
 * oldorg: source frame for forward prediction (P and B frames)
 * neworg: source frame for backward prediction (B frames only)
 * oldref: reconstructed frame for forward prediction (P and B frames)
 * newref: reconstructed frame for backward prediction (B frames only)
 * cur:    current frame (the one for which the prediction is formed)
 * sxf,syf: forward search window (frame coordinates)
 * sxb,syb: backward search window (frame coordinates)
 * mbi:    pointer to macroblock info structure
 *
 * results:
 * mbi->
 *  mb_type: 0, MB_INTRA, MB_FORWARD, MB_BACKWARD, MB_FORWARD|MB_BACKWARD
 *  MV[][][]: motion vectors (frame format)
 *  mv_field_sel: top/bottom field (for field prediction)
 *  motion_type: MC_FRAME, MC_FIELD
 *
 * uses global vars: pict_type, frame_pred_dct
 */
void motion_estimation(oldorg,neworg,oldref,newref,cur,sxf,syf,sxb,syb,mbi)
unsigned char *oldorg,*neworg,*oldref,*newref,*cur;
int sxf,syf,sxb,syb;
struct mbinfo *mbi;
{
  int i,j;
  int imin,jmin,iminf,jminf,iminr,jminr;
  int imint,jmint,iminb,jminb;
  int imintf,jmintf,iminbf,jminbf;
  int imintr,jmintr,iminbr,jminbr;
  int var,v0;
  int dmc,dmcf,dmcr,dmci,vmc,vmcf,vmcr,vmci;
  int dmcfield,dmcfieldf,dmcfieldr,dmcfieldi;
  int tsel,bsel,tself,bself,tselr,bselr;
  unsigned char *mb;

  for (j=0; j<height; j+=16)
  {
    for (i=0; i<width; i+=16)
    {
      mb = cur+i+width*j;
      if (pict_type==I_TYPE)
        mbi->mb_type = MB_INTRA;
      else if (pict_type==P_TYPE)
      {
        var = variance(mb,width);
        if (frame_pred_dct)
        {
          dmc = fullsearch(oldorg,oldref,mb,
                           width,i,j,sxf,syf,16,width,height,&imin,&jmin);
          vmc = dist2(oldref+(imin>>1)+width*(jmin>>1),mb,
                      width,imin&1,jmin&1,16);
          mbi->motion_type = MC_FRAME;
        }
        else
        {
          frame_estimate(oldorg,oldref,mb,i,j,sxf,syf,
            &imin,&jmin,&imint,&jmint,&iminb,&jminb,
            &dmc,&dmcfield,&tsel,&bsel);

          /* select between frame and field prediction */
          if (dmc<=dmcfield)
          {
            mbi->motion_type = MC_FRAME;
            vmc = dist2(oldref+(imin>>1)+width*(jmin>>1),mb,
                        width,imin&1,jmin&1,16);
          }
          else
          {
            mbi->motion_type = MC_FIELD;
            dmc = dmcfield;
            vmc = dist2(oldref+(tsel?width:0)+(imint>>1)+(width<<1)*(jmint>>1),
                        mb,width<<1,imint&1,jmint&1,8);
            vmc+= dist2(oldref+(bsel?width:0)+(iminb>>1)+(width<<1)*(jminb>>1),
                        mb+width,width<<1,iminb&1,jminb&1,8);
          }
        }
        if (vmc>var && vmc>=9*256)
          mbi->mb_type = MB_INTRA;
        else
        {
          v0 = dist2(oldref+i+width*j,mb,width,0,0,16);
          if (4*v0>5*vmc && v0>=9*256)
          {
            var = vmc;
            mbi->mb_type = MB_FORWARD;
            if (mbi->motion_type==MC_FRAME)
            {
              mbi->MV[0][0][0] = imin - (i<<1);
              mbi->MV[0][0][1] = jmin - (j<<1);
            }
            else
            {
              /* these are FRAME vectors */
              mbi->MV[0][0][0] = imint - (i<<1);
              mbi->MV[0][0][1] = (jmint<<1) - (j<<1);
              mbi->MV[1][0][0] = iminb - (i<<1);
              mbi->MV[1][0][1] = (jminb<<1) - (j<<1);
              mbi->mv_field_sel[0][0] = tsel;
              mbi->mv_field_sel[1][0] = bsel;
            }
          }
          else
          {
            var = v0;
            mbi->mb_type = 0;
            mbi->motion_type = MC_FRAME; /* probably redundant */
            mbi->MV[0][0][0] = 0;
            mbi->MV[0][0][1] = 0;
          }
        }
      }
      else /* if (pict_type==B_TYPE) */
      {
        var = variance(mb,width);
        if (frame_pred_dct)
        {
          /* forward */
          dmcf = fullsearch(oldorg,oldref,mb,
                            width,i,j,sxf,syf,16,width,height,&iminf,&jminf);
          vmcf = dist2(oldref+(iminf>>1)+width*(jminf>>1),mb,
                       width,iminf&1,jminf&1,16);

          /* backward */
          dmcr = fullsearch(neworg,newref,mb,
                            width,i,j,sxb,syb,16,width,height,&iminr,&jminr);
          vmcr = dist2(newref+(iminr>>1)+width*(jminr>>1),mb,
                       width,iminr&1,jminr&1,16);

          /* interpolated (bidirectional) */
          vmci = bdist2(oldref+(iminf>>1)+width*(jminf>>1),
                        newref+(iminr>>1)+width*(jminr>>1),
                        mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);

          /* decisions */
          if (vmcf<=vmcr && vmcf<=vmci)
          {
            vmc = vmcf;
            mbi->mb_type = MB_FORWARD;
          }
          else if (vmcr<=vmci)
          {
            vmc = vmcr;
            mbi->mb_type = MB_BACKWARD;
          }
          else
          {
            vmc = vmci;
            mbi->mb_type = MB_FORWARD|MB_BACKWARD;
          }

          mbi->motion_type = MC_FRAME;
        }
        else
        {
          /* forward prediction */
          frame_estimate(oldorg,oldref,mb,i,j,sxf,syf,
            &iminf,&jminf,&imintf,&jmintf,&iminbf,&jminbf,
            &dmcf,&dmcfieldf,&tself,&bself);

          /* backward prediction */
          frame_estimate(neworg,newref,mb,i,j,sxb,syb,
            &iminr,&jminr,&imintr,&jmintr,&iminbr,&jminbr,
            &dmcr,&dmcfieldr,&tselr,&bselr);

          /* calculate interpolated distance */
          /* frame */
          dmci = bdist1(oldref+(iminf>>1)+width*(jminf>>1),
                        newref+(iminr>>1)+width*(jminr>>1),
                        mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);

          /* top field */
          dmcfieldi = bdist1(oldref+(imintf>>1)+(tself?width:0)+(width<<1)*(jmintf>>1),
                             newref+(imintr>>1)+(tselr?width:0)+(width<<1)*(jmintr>>1),
                             mb,width<<1,imintf&1,jmintf&1,imintr&1,jmintr&1,8);

          /* bottom field */
          dmcfieldi+= bdist1(oldref+(iminbf>>1)+(bself?width:0)+(width<<1)*(jminbf>>1),
                             newref+(iminbr>>1)+(bselr?width:0)+(width<<1)*(jminbr>>1),
                             mb+width,width<<1,iminbf&1,jminbf&1,iminbr&1,jminbr&1,8);

          /* select prediction type of minimum distance */
          if (dmci<dmcfieldi && dmci<dmcf && dmci<dmcfieldf
              && dmci<dmcr && dmci<dmcfieldr)
          {
            /* frame, interpolated */
            mbi->mb_type = MB_FORWARD|MB_BACKWARD;
            mbi->motion_type = MC_FRAME;
            vmc = bdist2(oldref+(iminf>>1)+width*(jminf>>1),
                         newref+(iminr>>1)+width*(jminr>>1),
                         mb,width,iminf&1,jminf&1,iminr&1,jminr&1,16);
          }
          else if (dmcfieldi<dmcf && dmcfieldi<dmcfieldf
                   && dmcfieldi<dmcr && dmcfieldi<dmcfieldr)
          {
            /* field, interpolated */
            mbi->mb_type = MB_FORWARD|MB_BACKWARD;
            mbi->motion_type = MC_FIELD;
            vmc = bdist2(oldref+(imintf>>1)+(tself?width:0)+(width<<1)*(jmintf>>1),
                         newref+(imintr>>1)+(tselr?width:0)+(width<<1)*(jmintr>>1),
                         mb,width<<1,imintf&1,jmintf&1,imintr&1,jmintr&1,8);
            vmc+= bdist2(oldref+(iminbf>>1)+(bself?width:0)+(width<<1)*(jminbf>>1),
                         newref+(iminbr>>1)+(bselr?width:0)+(width<<1)*(jminbr>>1),
                         mb+width,width<<1,iminbf&1,jminbf&1,iminbr&1,jminbr&1,8);
          }
          else if (dmcf<dmcfieldf && dmcf<dmcr && dmcf<dmcfieldr)
          {
            /* frame, forward */
            mbi->mb_type = MB_FORWARD;
            mbi->motion_type = MC_FRAME;
            vmc = dist2(oldref+(iminf>>1)+width*(jminf>>1),mb,
                        width,iminf&1,jminf&1,16);
          }
          else if (dmcfieldf<dmcr && dmcfieldf<dmcfieldr)
          {
            /* field, forward */
            mbi->mb_type = MB_FORWARD;
            mbi->motion_type = MC_FIELD;
            vmc = dist2(oldref+(tself?width:0)+(imintf>>1)+(width<<1)*(jmintf>>1),
                        mb,width<<1,imintf&1,jmintf&1,8);
            vmc+= dist2(oldref+(bself?width:0)+(iminbf>>1)+(width<<1)*(jminbf>>1),
                        mb+width,width<<1,iminbf&1,jminbf&1,8);
          }
          else if (dmcr<dmcfieldr)
          {
            /* frame, backward */
            mbi->mb_type = MB_BACKWARD;
            mbi->motion_type = MC_FRAME;
            vmc = dist2(newref+(iminr>>1)+width*(jminr>>1),mb,
                        width,iminr&1,jminr&1,16);
          }
          else
          {
            /* field, backward */
            mbi->mb_type = MB_BACKWARD;
            mbi->motion_type = MC_FIELD;
            vmc = dist2(newref+(tselr?width:0)+(imintr>>1)+(width<<1)*(jmintr>>1),
                        mb,width<<1,imintr&1,jmintr&1,8);
            vmc+= dist2(newref+(bselr?width:0)+(iminbr>>1)+(width<<1)*(jminbr>>1),
                        mb+width,width<<1,iminbr&1,jminbr&1,8);
          }
        }

        if (vmc>var && vmc>=9*256)
          mbi->mb_type = MB_INTRA;
        else
        {
          var = vmc;
          if (mbi->motion_type==MC_FRAME)
          {
            mbi->MV[0][0][0] = iminf - (i<<1);
            mbi->MV[0][0][1] = jminf - (j<<1);
            mbi->MV[0][1][0] = iminr - (i<<1);
            mbi->MV[0][1][1] = jminr - (j<<1);
          }
          else
          {
            /* these are FRAME vectors */
            mbi->MV[0][0][0] = imintf - (i<<1);
            mbi->MV[0][0][1] = (jmintf<<1) - (j<<1);
            mbi->MV[1][0][0] = iminbf - (i<<1);
            mbi->MV[1][0][1] = (jminbf<<1) - (j<<1);
            mbi->mv_field_sel[0][0] = tself;
            mbi->mv_field_sel[1][0] = bself;
            mbi->MV[0][1][0] = imintr - (i<<1);
            mbi->MV[0][1][1] = (jmintr<<1) - (j<<1);
            mbi->MV[1][1][0] = iminbr - (i<<1);
            mbi->MV[1][1][1] = (jminbr<<1) - (j<<1);
            mbi->mv_field_sel[0][1] = tselr;
            mbi->mv_field_sel[1][1] = bselr;
          }
        }
      }

      mbi->var = var;
      mbi++;
    }

    if (!quiet)
    {
      putc('.',stderr);
      fflush(stderr);
    }
  }
  if (!quiet)
    putc('\n',stderr);
}

/*
 * frame picture motion estimation
 *
 * org: top left pel of source reference frame
 * ref: top left pel of reconstructed reference frame
 * mb:  macroblock to be matched
 * i,j: location of mb relative to ref (=center of search window)
 * sx,sy: half widths of search window
 * iminp,jminp,dframep: location and value of best frame prediction
 * imintp,jmintp,tselp: location of best field pred. for top field of mb
 * iminbp,jminbp,bselp: location of best field pred. for bottom field of mb
 * dfieldp: value of field prediction
 */
static void frame_estimate(org,ref,mb,i,j,sx,sy,
  iminp,jminp,imintp,jmintp,iminbp,jminbp,dframep,dfieldp,tselp,bselp)
unsigned char *org,*ref,*mb;
int i,j,sx,sy;
int *iminp,*jminp;
int *imintp,*jmintp,*iminbp,*jminbp;
int *dframep,*dfieldp;
int *tselp,*bselp;
{
  int dt,db,dmint,dminb;
  int imint,iminb,jmint,jminb;

  /* frame prediction */
  *dframep = fullsearch(org,ref,mb,width,i,j,sx,sy,16,width,height,
                        iminp,jminp);

  /* predict top field from top field */
  dt = fullsearch(org,ref,mb,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
                  &imint,&jmint);

  /* predict top field from bottom field */
  db = fullsearch(org+width,ref+width,mb,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
                  &iminb,&jminb);

  /* select prediction for top field */
  if (dt<=db)
  {
    dmint=dt; *imintp=imint; *jmintp=jmint; *tselp=0;
  }
  else
  {
    dmint=db; *imintp=iminb; *jmintp=jminb; *tselp=1;
  }

  /* predict bottom field from top field */
  dt = fullsearch(org,ref,mb+width,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
                  &imint,&jmint);

  /* predict bottom field from bottom field */
  db = fullsearch(org+width,ref+width,mb+width,width<<1,i,j>>1,sx,sy>>1,8,width,height>>1,
                  &iminb,&jminb);

  /* select prediction for bottom field */
  if (db<=dt)
  {
    dminb=db; *iminbp=iminb; *jminbp=jminb; *bselp=1;
  }
  else
  {
    dminb=dt; *iminbp=imint; *jminbp=jmint; *bselp=0;
  }

  *dfieldp=dmint+dminb;
}

/*
 * full search block matching
 *
 * blk: top left pel of (16*h) block
 * h: height of block
 * lx: distance (in bytes) of vertically adjacent pels in ref,blk
 * org: top left pel of source reference picture
 * ref: top left pel of reconstructed reference picture
 * i0,j0: center of search window
 * sx,sy: half widths of search window
 * xmax,ymax: right/bottom limits of search area
 * iminp,jminp: pointers to where the result is stored
 *              result is given as half pel offset from ref(0,0)
 *              i.e. NOT relative to (i0,j0)
 */
static int fullsearch(org,ref,blk,lx,i0,j0,sx,sy,h,xmax,ymax,iminp,jminp)
unsigned char *org,*ref,*blk;
int lx,i0,j0,sx,sy,h,xmax,ymax;
int *iminp,*jminp;
{
  int i,j,imin,jmin,ilow,ihigh,jlow,jhigh;
  int d,dmin;

  dmin=65536;

  ilow = i0 - sx;
  ihigh = i0 + sx;

  if (ilow<0)
    ilow = 0;

  if (ihigh>xmax-16)
    ihigh = xmax-16;

  jlow = j0 - sy;
  jhigh = j0 + sy;

  if (jlow<0)
    jlow = 0;

  if (jhigh>ymax-h)
    jhigh = ymax-h;

  /* full pel */
  for (j=jlow; j<=jhigh; j++)
    for (i=ilow; i<=ihigh; i++)
    {
      d = dist1(org+i+lx*j,blk,lx,0,0,h);

      if (d<dmin)
      {
        dmin = d;
        imin = i;
        jmin = j;
      }
    }

  /* half pel */
  dmin = 65536;
  imin <<= 1;
  jmin <<= 1;
  ilow = imin - (imin>0);
  ihigh = imin + (imin<((xmax-16)<<1));
  jlow = jmin - (jmin>0);
  jhigh = jmin + (jmin<((ymax-h)<<1));

  for (j=jlow; j<=jhigh; j++)
    for (i=ilow; i<=ihigh; i++)
    {
      d = dist1(ref+(i>>1)+lx*(j>>1),blk,lx,i&1,j&1,h);

      if (d<dmin)
      {
        dmin = d;
        imin = i;
        jmin = j;
      }
    }

  *iminp = imin;
  *jminp = jmin;

  return dmin;
}

/*
 * total absolute difference between two (16*h) blocks
 * including optional half pel interpolation of blk1 (hx,hy)
 * blk1,blk2: addresses of top left pels of both blocks
 * lx:        distance (in bytes) of vertically adjacent pels
 * hx,hy:     flags for horizontal and/or vertical interpolation
 * h:         height of block (usually 8 or 16)
 */
static int dist1(blk1,blk2,lx,hx,hy,h)
unsigned char *blk1,*blk2;
int lx,hx,hy,h;
{
  unsigned char *p1,*p1a,*p2;
  int i,j;
  int s,v;

  s = 0;
  p1 = blk1;
  p2 = blk2;

  if (!hx && !hy)
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = p1[i] - p2[i];
        if (v>=0)
          s+= v;
        else
          s-= v;
      }
      p1+= lx;
      p2+= lx;
    }
  else if (hx && !hy)
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1[i+1]+1)>>1) - p2[i];
        if (v>=0)
          s+= v;
        else
          s-= v;
      }
      p1+= lx;
      p2+= lx;
    }
  else if (!hx && hy)
  {
    p1a = p1 + lx;
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1a[i]+1)>>1) - p2[i];
        if (v>=0)
          s+= v;
        else
          s-= v;
      }
      p1 = p1a;
      p1a+= lx;
      p2+= lx;
    }
  }
  else /* if (hx && hy) */
  {
    p1a = p1 + lx;
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1[i+1]+p1a[i]+p1a[i+1]+2)>>2) - p2[i];
        if (v>=0)
          s+= v;
        else
          s-= v;
      }
      p1 = p1a;
      p1a+= lx;
      p2+= lx;
    }
  }

  return s;
}

/*
 * total squared difference between two (16*h) blocks
 * including optional half pel interpolation of blk1 (hx,hy)
 * blk1,blk2: addresses of top left pels of both blocks
 * lx:        distance (in bytes) of vertically adjacent pels
 * hx,hy:     flags for horizontal and/or vertical interpolation
 * h:         height of block (usually 8 or 16)
 */
static int dist2(blk1,blk2,lx,hx,hy,h)
unsigned char *blk1,*blk2;
int lx,hx,hy,h;
{
  unsigned char *p1,*p1a,*p2;
  int i,j;
  int s,v;

  s = 0;
  p1 = blk1;
  p2 = blk2;
  if (!hx && !hy)
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = p1[i] - p2[i];
        s+= v*v;
      }
      p1+= lx;
      p2+= lx;
    }
  else if (hx && !hy)
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1[i+1]+1)>>1) - p2[i];
        s+= v*v;
      }
      p1+= lx;
      p2+= lx;
    }
  else if (!hx && hy)
  {
    p1a = p1 + lx;
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1a[i]+1)>>1) - p2[i];
        s+= v*v;
      }
      p1 = p1a;
      p1a+= lx;
      p2+= lx;
    }
  }
  else /* if (hx && hy) */
  {
    p1a = p1 + lx;
    for (j=0; j<h; j++)
    {
      for (i=0; i<16; i++)
      {
        v = ((unsigned int)(p1[i]+p1[i+1]+p1a[i]+p1a[i+1]+2)>>2) - p2[i];
        s+= v*v;
      }
      p1 = p1a;
      p1a+= lx;
      p2+= lx;
    }
  }

  return s;
}

/*
 * absolute difference error between a (16*h) block and a bidirectional
 * prediction
 *
 * p2: address of top left pel of block
 * pf,hxf,hyf: address and half pel flags of forward ref. block
 * pb,hxb,hyb: address and half pel flags of backward ref. block
 * h: height of block
 * lx: distance (in bytes) of vertically adjacent pels in p2,pf,pb
 */
static int bdist1(pf,pb,p2,lx,hxf,hyf,hxb,hyb,h)
unsigned char *pf,*pb,*p2;
int lx,hxf,hyf,hxb,hyb,h;
{
  unsigned char *pfa,*pfb,*pfc,*pba,*pbb,*pbc;
  int i,j;
  int s,v;

  pfa = pf + hxf;
  pfb = pf + lx*hyf;
  pfc = pfb + hxf;

  pba = pb + hxb;
  pbb = pb + lx*hyb;
  pbc = pbb + hxb;

  s = 0;

  for (j=0; j<h; j++)
  {
    for (i=0; i<16; i++)
    {
      v = ((((unsigned int)(*pf++ + *pfa++ + *pfb++ + *pfc++ + 2)>>2) +
            ((unsigned int)(*pb++ + *pba++ + *pbb++ + *pbc++ + 2)>>2) + 1)>>1)
           - *p2++;
      if (v>=0)
        s+= v;
      else
        s-= v;
    }
    p2+= lx-16;
    pf+= lx-16;
    pfa+= lx-16;
    pfb+= lx-16;
    pfc+= lx-16;
    pb+= lx-16;
    pba+= lx-16;
    pbb+= lx-16;
    pbc+= lx-16;
  }

  return s;
}

/*
 * squared error between a (16*h) block and a bidirectional
 * prediction
 *
 * p2: address of top left pel of block
 * pf,hxf,hyf: address and half pel flags of forward ref. block
 * pb,hxb,hyb: address and half pel flags of backward ref. block
 * h: height of block
 * lx: distance (in bytes) of vertically adjacent pels in p2,pf,pb
 */
static int bdist2(pf,pb,p2,lx,hxf,hyf,hxb,hyb,h)
unsigned char *pf,*pb,*p2;
int lx,hxf,hyf,hxb,hyb,h;
{
  unsigned char *pfa,*pfb,*pfc,*pba,*pbb,*pbc;
  int i,j;
  int s,v;

  pfa = pf + hxf;
  pfb = pf + lx*hyf;
  pfc = pfb + hxf;

  pba = pb + hxb;
  pbb = pb + lx*hyb;
  pbc = pbb + hxb;

  s = 0;

  for (j=0; j<h; j++)
  {
    for (i=0; i<16; i++)
    {
      v = ((((unsigned int)(*pf++ + *pfa++ + *pfb++ + *pfc++ + 2)>>2) +
            ((unsigned int)(*pb++ + *pba++ + *pbb++ + *pbc++ + 2)>>2) + 1)>>1)
          - *p2++;
      s+=v*v;
    }
    p2+= lx-16;
    pf+= lx-16;
    pfa+= lx-16;
    pfb+= lx-16;
    pfc+= lx-16;
    pb+= lx-16;
    pba+= lx-16;
    pbb+= lx-16;
    pbc+= lx-16;
  }

  return s;
}

/*
 * variance of a (16*16) block, multiplied by 256
 * p:  address of top left pel of block
 * lx: distance (in bytes) of vertically adjacent pels
 */
static int variance(p,lx)
unsigned char *p;
int lx;
{
  int i,j;
  unsigned int v,s,s2;

  s = s2 = 0;

  for (j=0; j<16; j++)
  {
    for (i=0; i<16; i++)
    {
      v = *p++;
      s+= v;
      s2+= v*v;
    }
    p+= lx-16;
  }
  return s2 - (s*s)/256;
}
