/**************************************************************************
*  Image process tool box                                                 *
*     by Yang Yudong                                                      *
*     yangyd@yahoo.com                                                    *
*                                                                         *
***************************************************************************
*  Copyright (C) 1992-1999, Yang Yudong, All rights reserved.             *
*  This file is part of Yang Yudong's image processing software package.  *
*  If you use this software, you agree to the following:                  *
*  This program package is purely experimental, and is licensed "as is".  *
*  Permission is granted to use, modify, and distribute this program      *
*  without charge for any purpose, provided this license/ disclaimer      *
*  notice appears in the copies.  No warranty or maintenance is given,    *
*  either expressed or implied.  In no event shall the author(s) be       *
*  liable to you or a third party for any special, incidental,            *
*  consequential, or other damages, arising out of the use or inability   *
*  to use the program for any purpose (or the loss of data), even if we   *
*  have been advised of such possibilities.  Any public reference or      *
*  advertisement of this source code should refer to it as Yang Yudong's  *
*  orignal.                                                               *
**************************************************************************/
// ****************************************************************
//  Image process tool box
//     by Yang Yudong
//
// File : lut2ds.c
// Description:  Do 2D affine transformation  
// Create Date:  1997. 3. 30 (modified from lut2ds.c & lut2dns.c, and take their places) 
// Modification(date/where): 
//
// ****************************************************************

#define _IMG_LIBBUILD_
#include "image.h"
#include "routines.h"

// bilinear interpolation
void __lut2d(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy, IBYTE fill)
{
  int i, j, x16, y16;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp, dx, dy;
  IBYTE *pp;
  long p00, p10, p01, p11;
  
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;       y16 = (iy-1)<<10;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) *des = fill;
      else {
        xp = xx>>10; yp = yy>>10; dx = xx&0x3ff; dy = yy&0x3ff;
        pp = src+xp+yp*ix; 
        p00 = *pp;  
        if(xx < x16) {
          p10 = *(pp+1); 
          if(yy < y16) { p01 = *(pp+ix);  p11 = *(pp+ix+1);}
          else { p01 = *pp;  p11 = *(pp+1); }
        }
        else { // xx = x16
          p10 = *pp; 
          if(yy < y16) { p01 = *(pp+ix);  p11 = *(pp+ix); }
          else { p01 = *pp;   p11 = *pp; }
        }
        *des = (IBYTE)(((p11*dx+p01*(1024-dx))*dy + (p10*dx+p00*(1024-dx))*(1024-dy))>>20);
      }
    }
}


// near neighbor interpolation
void __lut2d_fast(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy, IBYTE fill)
{
  int i, j, x16, y16;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp;
  
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;       y16 = (iy-1)<<10;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) *des = fill;
      else {
        xp = (xx+512)>>10; yp = (yy+512)>>10;
        *des = *(src+xp+yp*ix); 
      }
    }
}


#define SFT	10

// bicubic interpolation
void __lut2d_fine(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy, IBYTE fill)
{
  int i, j, x16, y16, ixn, iyn, ixd, iyd;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp, dx, dy, dx2, dy2, dx3, dy3;
  int m0, m1, m2, m3;
  IBYTE *pa, *pb, *pc, *pd;
  
  if(ix < 4 || iy < 4) {
    __lut2d(src, ix, iy, des, rx, ry, lutxy, fill);
    return;
  }
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;      y16 = (iy-1)<<10;
  ixn = ix-4;            iyn = iy-2;
  ixd = ix-1;            iyd = iy-1;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) *des = fill;
      else {
        xp = xx>>10; yp = yy>>10; dx = xx&0x3ff; dy = yy&0x3ff;
        dx2 = (dx*dx)>>10;   dy2 = (dy*dy)>>10;
        dx3 = (dx2*dx)>>10;  dy3 = (dy2*dy)>>10;
		m0 =         - dx + (dx2+dx2) - dx3;
		m1 = (1<<10)      - (dx2+dx2) + dx3;
		m2 =           dx + dx2       - dx3;
		m3 =              - dx2       + dx3;
        
		if(xp > 0)
			pa = src+xp-1;
		else pa = src;
		pb = src+xp;
		pc = src+xp+1;
		if(xp < ixd)
			pd = src+xp+2;
		else
			pd = src+xp+1;
		
		if(yp > 0)
			pa += (yp-1)*ix;
		pb += yp*ix;
		pc += (yp+1)*ix;
		if(yp < iyd)
			pd += (yp+2)*ix;
		else
			pd += (yp+1)*ix;

        *des = (IBYTE)trunc_8( 
		   ((m0*pa[0] + m1*pa[1] + m2*pa[2] + m3*pa[3])*(        - dy + (dy2+dy2) - dy3) +
            (m0*pb[0] + m1*pb[1] + m2*pb[2] + m3*pb[3])*((1<<10)      - (dy2+dy2) + dy3) +
            (m0*pc[0] + m1*pc[1] + m2*pc[2] + m3*pc[3])*(          dy + dy2       - dy3) +
            (m0*pd[0] + m1*pd[1] + m2*pd[2] + m3*pd[3])*(             - dy2       + dy3)  ) >> 20 );
      }
    }
}



// bilinear interpolation
void __lut2d_keep_old(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy)
{
  int i, j, x16, y16;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp, dx, dy;
  IBYTE *pp;
  long p00, p10, p01, p11;
  
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;       y16 = (iy-1)<<10;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) continue;
      else {
        xp = xx>>10; yp = yy>>10; dx = xx&0x3ff; dy = yy&0x3ff;
        pp = src+xp+yp*ix; 
        p00 = *pp;  
        if(xx < x16) {
          p10 = *(pp+1); 
          if(yy < y16) { p01 = *(pp+ix);  p11 = *(pp+ix+1);}
          else { p01 = *pp;  p11 = *(pp+1); }
        }
        else { // xx = x16
          p10 = *pp; 
          if(yy < y16) { p01 = *(pp+ix);  p11 = *(pp+ix); }
          else { p01 = *pp;   p11 = *pp; }
        }
        *des = (IBYTE)(((p11*dx+p01*(1024-dx))*dy + (p10*dx+p00*(1024-dx))*(1024-dy))>>20);
      }
    }
}


// near neighbor interpolation
void __lut2d_fast_keep_old(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy)
{
  int i, j, x16, y16;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp;
  
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;       y16 = (iy-1)<<10;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) continue;
      else {
        xp = (xx+512)>>10; yp = (yy+512)>>10;
        *des = *(src+xp+yp*ix); 
      }
    }
}



// bicubic interpolation
void __lut2d_fine_keep_old(IBYTE *src, int ix, int iy, IBYTE *des, int rx, int ry, long *lutxy)
{
  int i, j, x16, y16, ixn, iyn, ixd, iyd;
  long *lutx1, *lutx2, *luty1, *luty2;
  int xx, yy, xp, yp, dx, dy, dx2, dy2, dx3, dy3;
  int m0, m1, m2, m3;
  IBYTE *pa, *pb, *pc, *pd;
  
  if(ix < 4 || iy < 4) {
    __lut2d_keep_old(src, ix, iy, des, rx, ry, lutxy);
    return;
  }
  lutx1 = lutxy;     lutx2 = lutxy+rx;  
  luty1 = lutx2+rx;  luty2 = luty1+ry;
  x16 = (ix-1)<<10;      y16 = (iy-1)<<10;
  ixn = ix-4;            iyn = iy-2;
  ixd = ix-1;            iyd = iy-1;
  for(j=0; j<ry; j++)
    for(i=0; i<rx; i++, des++) {
      xx = lutx1[i] + luty1[j];
      yy = lutx2[i] + luty2[j];
      if( xx<0 || yy<0 || xx>x16 || yy>y16) continue;
      else {
        xp = xx>>10; yp = yy>>10; dx = xx&0x3ff; dy = yy&0x3ff;
        dx2 = (dx*dx)>>10;   dy2 = (dy*dy)>>10;
        dx3 = (dx2*dx)>>10;  dy3 = (dy2*dy)>>10;
		m0 =         - dx + (dx2+dx2) - dx3;
		m1 = (1<<10)      - (dx2+dx2) + dx3;
		m2 =           dx + dx2       - dx3;
		m3 =              - dx2       + dx3;
        
		if(xp > 0)
			pa = src+xp-1;
		else pa = src;
		pb = src+xp;
		pc = src+xp+1;
		if(xp < ixd)
			pd = src+xp+2;
		else
			pd = src+xp+1;
		
		if(yp > 0)
			pa += (yp-1)*ix;
		pb += yp*ix;
		pc += (yp+1)*ix;
		if(yp < iyd)
			pd += (yp+2)*ix;
		else
			pd += (yp+1)*ix;

        *des = (IBYTE)trunc_8( 
		   ((m0*pa[0] + m1*pa[1] + m2*pa[2] + m3*pa[3])*(        - dy + (dy2+dy2) - dy3) +
            (m0*pb[0] + m1*pb[1] + m2*pb[2] + m3*pb[3])*((1<<10)      - (dy2+dy2) + dy3) +
            (m0*pc[0] + m1*pc[1] + m2*pc[2] + m3*pc[3])*(          dy + dy2       - dy3) +
            (m0*pd[0] + m1*pd[1] + m2*pd[2] + m3*pd[3])*(             - dy2       + dy3)  ) >> 20 );
      }
    }
}
