//	spatial smoother - reduce noise while preserving structure
//	Copyright (C) 2001 Ioura Batugowski (dividee)
//
//	This program is free software; you can redistribute it and/or modify
//	it under the terms of the GNU General Public License as published by
//	the Free Software Foundation; either version 2 of the License, or
//	(at your option) any later version.
//
//	This program is distributed in the hope that it will be useful,
//	but WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//	GNU General Public License for more details.
//
//	You should have received a copy of the GNU General Public License
//	along with this program; if not, write to the Free Software
//	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

#include <windows.h>
#include <commctrl.h>
#include <stdio.h>

#include "resource.h"
#include "filter.h"
#include "ScriptInterpreter.h"
#include "ScriptError.h"
#include "ScriptValue.h"

#define MAX_RADIUS 5

/// Prototypes ////////////////////////////////////////////////////////////////
int ssmooth_initProc(FilterActivation *fa, const FilterFunctions *ff);
int ssmooth_startProc(FilterActivation *fa, const FilterFunctions *ff);
int ssmooth_endProc(FilterActivation *fa, const FilterFunctions *ff);
long ssmooth_paramProc(FilterActivation *fa, const FilterFunctions *ff);
int ssmooth_configProc(FilterActivation *fa, const FilterFunctions *ff, HWND hwnd);
void ssmooth_stringProc(const FilterActivation *fa, const FilterFunctions *ff, char *buf);
int ssmooth_runProc(const FilterActivation *fa, const FilterFunctions *ff);
void ssmooth_configScriptProc(IScriptInterpreter *isi, void *fa_ptr, CScriptValue *argv, int argc);
bool ssmooth_FssProc(FilterActivation *fa, const FilterFunctions *ff, char *buf, int buflen);
///////////////////////////////////////////////////////////////////////////////

class ssmoothFilterData {
public:
	IFilterPreview *ifp;
	int *squaretab;
	int strength;
	int radius;
};

ScriptFunctionDef ssmooth_func_defs[]={
    { (ScriptFunctionPtr)ssmooth_configScriptProc, "Config", "0ii" },
    { NULL },
};

CScriptObject ssmooth_obj={
    NULL, ssmooth_func_defs, NULL
};

struct FilterDefinition filterDef_ssmooth = {

	NULL, NULL, NULL,								// next, prev, module
	"spatial smoother (1.0)",							// name
	"Yet another spatial noise reduction filter.",	// desc
	"Dividee",										// maker
	NULL,											// private_data
	sizeof(ssmoothFilterData),						// inst_data_size

	ssmooth_initProc,											// initProc
	NULL,											// deinitProc
	ssmooth_runProc,								// runProc
	ssmooth_paramProc,								// paramProc
	ssmooth_configProc, 							// configProc
	ssmooth_stringProc,								// stringProc
	ssmooth_startProc,								// startProc
	ssmooth_endProc,								// endProc

	&ssmooth_obj,									// script_obj
	ssmooth_FssProc,								// fssProc

};

///////////////////////////////////////////////////////////////////////////////
extern "C" int __declspec(dllexport) __cdecl VirtualdubFilterModuleInit2(FilterModule *fm, const FilterFunctions *ff, int& vdfd_ver, int& vdfd_compat);
extern "C" void __declspec(dllexport) __cdecl VirtualdubFilterModuleDeinit(FilterModule *fm, const FilterFunctions *ff);

static FilterDefinition *fd_ssmooth;

int __declspec(dllexport) __cdecl VirtualdubFilterModuleInit2(FilterModule *fm, const FilterFunctions *ff, int& vdfd_ver, int& vdfd_compat)
{
	if (!(fd_ssmooth = ff->addFilter(fm, &filterDef_ssmooth, sizeof(FilterDefinition))))
		return 1;

	vdfd_ver	= VIRTUALDUB_FILTERDEF_VERSION;
	vdfd_compat = VIRTUALDUB_FILTERDEF_COMPATIBLE;

	return 0;
}

void __declspec(dllexport) __cdecl VirtualdubFilterModuleDeinit(FilterModule *fm, const FilterFunctions *ff)
{
	ff->removeFilter(fd_ssmooth);
}
///////////////////////////////////////////////////////////////////////////////

static bool g_MMXenabled;

int ssmooth_initProc(FilterActivation *fa, const FilterFunctions *ff)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;
	fd->radius=2;
	fd->strength=3;

	return 0;
}

int ssmooth_startProc(FilterActivation *fa, const FilterFunctions *ff)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;
	int i;

	if (!(fd->squaretab = new int[255*2+1]))
		return 1;

	g_MMXenabled = ff->isMMXEnabled();

	for(i=0; i<=511; ++i)
		fd->squaretab[i] = (i-255)*(i-255);
	
	return 0;
}

int ssmooth_endProc(FilterActivation *fa, const FilterFunctions *ff)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;

	delete[] fd->squaretab; fd->squaretab = NULL;
	return 0;
}

long ssmooth_paramProc(FilterActivation *fa, const FilterFunctions *ff)
{
	fa->dst.pitch = (fa->dst.w*4 + 7) & -8;
	return FILTERPARAM_SWAP_BUFFERS;
}

#pragma warning(disable:4700)

BOOL CALLBACK ssmooth_configDlgProc(HWND hdlg, UINT msg, WPARAM wParam, LPARAM lParam)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)GetWindowLong(hdlg, DWL_USER);
	int pos;

	switch(msg) {
		case WM_INITDIALOG:
			SetWindowLong(hdlg, DWL_USER, lParam);
			fd = (ssmoothFilterData *)lParam;
			
			SendMessage(GetDlgItem(hdlg,IDC_SLIDER_RADIUS), TBM_SETRANGE, TRUE, MAKELONG(1,MAX_RADIUS));
			SendMessage(GetDlgItem(hdlg,IDC_SLIDER_RADIUS), TBM_SETPOS, TRUE, fd->radius);
			SetWindowText(GetDlgItem(hdlg,IDC_RADIUS), _itoa(fd->radius*2+1,new char[2],10));
			SendMessage(GetDlgItem(hdlg,IDC_SLIDER_STRENGTH), TBM_SETRANGE, TRUE, MAKELONG(0,18));
			SendMessage(GetDlgItem(hdlg,IDC_SLIDER_STRENGTH), TBM_SETPOS, TRUE, fd->strength);
			SetWindowText(GetDlgItem(hdlg,IDC_STRENGTH), _itoa(fd->strength,new char[2],10));
			fd->ifp->InitButton(GetDlgItem(hdlg,ID_PREVIEW));
			return TRUE;

		case WM_HSCROLL:
			if (lParam==(LPARAM)GetDlgItem(hdlg,IDC_SLIDER_RADIUS))
				if (fd->radius != (pos=SendMessage((HWND)lParam,TBM_GETPOS,0,0))) {
					SetWindowText(GetDlgItem(hdlg,IDC_RADIUS), _itoa(pos*2+1,new char[2],10));
					fd->radius = pos;
					fd->ifp->RedoFrame();
				}
			if (lParam==(LPARAM)GetDlgItem(hdlg,IDC_SLIDER_STRENGTH))
				if (fd->strength != (pos=SendMessage((HWND)lParam,TBM_GETPOS,0,0))) {
					SetWindowText(GetDlgItem(hdlg,IDC_STRENGTH), _itoa(pos,new char[2],10));
					fd->strength = pos;
					fd->ifp->RedoFrame();
				}
			return TRUE;

		case WM_COMMAND:
			switch(LOWORD(wParam)) {
			case IDOK:
				EndDialog(hdlg, 0);
				return TRUE;
			case IDCANCEL:
				EndDialog(hdlg, 1);
				return TRUE;
			case ID_PREVIEW:
				fd->ifp->Toggle(hdlg);
				return TRUE;
			case IDHELP:
				{
				char prog[256];
				char path[256];
				LPTSTR ptr;
				GetModuleFileName(NULL, prog, 255);
				GetFullPathName(prog, 255, path, &ptr);
				*ptr = 0;
				strcat(path, "plugins\\ssmooth.txt");
				strcpy(prog, "Notepad ");
				strcat(prog, path);
				WinExec(prog, SW_SHOW);
				return TRUE;
				}
			}
			break;
	}

	return FALSE;
}

#pragma warning(default:4700)

int ssmooth_configProc(FilterActivation *fa, const FilterFunctions *ff, HWND hwnd)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;

	int rad_bak = fd->radius;
	int str_bak = fd->strength;
	fd->ifp=fa->ifp;
	
	if (DialogBoxParam(fa->filter->module->hInstModule,MAKEINTRESOURCE(IDD_FILTER_SSMOOTH), hwnd,ssmooth_configDlgProc, (LPARAM)fa->filter_data))
	{
		fd->radius = rad_bak;
		fd->strength = str_bak;
		return 1;
	}
	return 0;
}

void ssmooth_stringProc(const FilterActivation *fa, const FilterFunctions *ff, char *buf)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;
	sprintf(buf," (diam %d, strength %d)",fd->radius*2+1,fd->strength);
}

void ssmooth_configScriptProc(IScriptInterpreter *isi, void *fa_ptr, CScriptValue *argv, int argc)
{
    FilterActivation *fa = (FilterActivation *)fa_ptr;
    ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;
	
	int d = argv[0].asInt();
	int s = argv[1].asInt();
	if (d<3 || !(d&1) || d>2*MAX_RADIUS+1 || s<0 || s>18)
		EXT_SCRIPT_ERROR(FCALL_OUT_OF_RANGE);
	fd->radius = d>>1;
	fd->strength = s;
}


bool ssmooth_FssProc(FilterActivation *fa, const FilterFunctions *ff, char *buf, int buflen)
{
    ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;

	_snprintf(buf, buflen, "Config(%d,%d)", fd->radius*2+1, fd->strength);

    return true;
}



#define SCALE(i) (0x0000000100000000i64 / (i))
#define SCALE2(i)	SCALE((i)+0),SCALE((i)+1),SCALE((i)+2),SCALE((i)+3),SCALE((i)+4),\
					SCALE((i)+5),SCALE((i)+6),SCALE((i)+7),SCALE((i)+8),SCALE((i)+9)
#define SCALE3(i)	SCALE2((i)+00),SCALE2((i)+10),SCALE2((i)+20),SCALE2((i)+30),SCALE2((i)+40),\
					SCALE2((i)+50),SCALE2((i)+60),SCALE2((i)+70),SCALE2((i)+80),SCALE2((i)+90)

	static const int scaletab32[]={
		0,
		0x7fffffff, 	// special case for 1
		0x7fffffff, 	// special case for 2
		SCALE(3),
		SCALE(4),
		SCALE(5),
		SCALE(6),
		SCALE(7),
		SCALE(8),
		SCALE(9),
		SCALE2(10),
		SCALE2(20),
		SCALE2(30),
		SCALE2(40),
		SCALE2(50),
		SCALE2(60),
		SCALE2(70),
		SCALE2(80),
		SCALE2(90),
		SCALE3(100),
		SCALE3(200),
		SCALE3(300),
		SCALE3(400),
		SCALE3(500),
		SCALE3(600),
		SCALE3(700),
		SCALE3(800),
		SCALE3(900),
		SCALE3(1000),
		SCALE3(1100),
		SCALE3(1200),
		SCALE3(1300),
		SCALE3(1400),
		SCALE3(1500),
		SCALE3(1600),
		SCALE3(1700),
		SCALE3(1800),
		SCALE3(1900),
	};

#undef SCALE
#undef SCALE2
#undef SCALE3

#define SCALE(i) (0x0001000100010001i64 * (0x10000 / (i)))
#define SCALE2(i)	SCALE((i)+0),SCALE((i)+1),SCALE((i)+2),SCALE((i)+3),SCALE((i)+4),\
					SCALE((i)+5),SCALE((i)+6),SCALE((i)+7),SCALE((i)+8),SCALE((i)+9)

	static const __int64 scaletab16[]={
		0,
		0x7fff7fff7fff7fffi64,		// special case for 1
		0x7fff7fff7fff7fffi64,		// special case for 2
		SCALE(3),
		SCALE(4),
		SCALE(5),
		SCALE(6),
		SCALE(7),
		SCALE(8),
		SCALE(9),
		SCALE2(10),
		SCALE2(20),
		SCALE2(30),
		SCALE2(40),
		SCALE2(50),
		SCALE2(60),
		SCALE2(70),
		SCALE2(80),
		SCALE2(90),
		SCALE2(100),
		SCALE2(110),
		SCALE2(120),
		SCALE2(130),
		SCALE2(140),
	};

#undef SCALE
#undef SCALE2

static void __declspec(naked) ssmooth_runProc_MMX(Pixel32 *srcp, Pixel32 *dstp, int w, int h, int src_pitch, int dst_pitch, int radius, int strength)
// esp +													20				24		28		32		36				40				44		48
{
	static const __int64 sixteen = 0x0000001000000010i64;
	static unsigned yymin,yymax,srcmax,xxmin;

	__asm {
		push		ebp
		push		edi
		push		esi
		push		ebx

		mov 		esi,[esp+20]				; srcp

		mov 		eax,[esp+32]				; h
		dec 		eax
		mul 		dword ptr [esp+36]			; (h-1)*src_pitch
		add 		eax, esi
		mov 		srcmax, eax
		pxor		mm5,mm5 					; zero
		movd		mm4,[esp+48]				; strength+32

y_loop:
		mov 		eax,[esp+36]				; src_pitch
		mul 		dword ptr [esp+44]			; src_pitch*radius
		mov 		ebp,esi
		sub 		ebp,eax 					; square_src
		cmp 		ebp,[esp+20]				; too low ?
		cmovb		ebp,[esp+20]				; yymin
		mov 		yymin,ebp
		mov 		ebp,esi
		add 		ebp,eax
		cmp 		ebp,srcmax
		cmova		ebp,srcmax					; yymax
		mov 		yymax,ebp
		xor 		ebp,ebp 					; X
				
x_loop:
		mov 		eax,ebp
		mov 		ebx,[esp+44]				; radius
		xor 		ecx,ecx
		shl 		ebx,2
		sub 		eax,ebx 					; x-4*radius
		cmovs		eax,ecx
		mov 		xxmin,eax
		mov 		eax,ebp
		add 		eax,ebx 					; x+4*radius
		cmp 		eax,[esp+28]				
		cmova		eax,[esp+28]				; max = width-4

		mov 		ebx,yymax
		mov 		ecx,xxmin
		pxor		mm6,mm6 					; reset GBaccum
		pxor		mm7,mm7 					; reset accum2
		movd		mm0,[esi+ebp]				; 0000?RGB 
		punpcklbw	mm0,mm5 					; 0?0R0G0B CENTER PIXEL
		mov 		edi,yymin					; LINE POINTER
		
yy_loop:
		mov 		edx,ecx 					; XX
xx_loop:
		movd		mm1,[edi+edx]
		punpcklbw	mm1,mm5 					; ? Ri Gi Bi
		movq		mm3,mm1
		psubw		mm1,mm0 					; ? R-Ri G-Gi B-Bi
		psllq		mm1,16						; R-Ri G-Gi B-Bi 0
		pmaddwd 	mm1,mm1 					; (R-Ri)+(G-Gi)  (B-Bi)
		punpckldq	mm2,mm1 					; (B-Bi)  ?
		paddd		mm1,mm2 					; (R-Ri)+(G-Gi)+(B-Bi)  ?
		movq		mm2,sixteen
		psrlq		mm1,mm4 					; sqerr(Q)
		punpckldq	mm1,mm1 					; sqerr(D) sqerr(D)
		psubusw 	mm2,mm1 					; 16-sqerr 16-sqerr
		movq		mm1,mm3
		punpcklwd	mm1,mm5 					; Gi Bi
		pmullw		mm1,mm2 					; Gi*(16-sqerr) Bi*(16-sqerr)
		paddd		mm6,mm1 					; Gaccum Baccum
		punpckhwd	mm3,mm5 					; ? Ri
		psllq		mm3,32						; Ri 0
		pmullw		mm3,mm2 					; Ri*(16-sqerr) 0
		paddd		mm7,mm3
		psrlq		mm2,32
		paddd		mm7,mm2 					; Raccum count

		add 		edx,4
		cmp 		edx,eax
		jna 		xx_loop

		add 		edi,[esp+36]				; +src_pitch
		cmp 		edi,ebx
		jna 		yy_loop

		movd		ebx,mm7 					; count
		psrlq		mm7,32
		mov 		ebx, [scaletab32+ebx*4]
		movd		eax,mm7 					; Raccum
		mul 		ebx
		mov 		ecx,edx 					; 000R
		movd		eax,mm6 					; Baccum
		mul 		ebx
		shl 		ecx,16
		or			ecx,edx 					; 0R0B
		psrlq		mm6,32
		movd		eax,mm6
		mul 		ebx
		mov 		edi,[esp+24]
		shl 		edx,8
		or			ecx,edx 					; 0RGB

		mov 		[edi+ebp],ecx				; write pixel

		add 		ebp,4
		cmp 		ebp,[esp+28]				; width-4
		jna 		x_loop

		mov 		eax,[esp+40]				; dst_pitch
		add 		[esp+24],eax

		add 		esi,[esp+36]				; +src_pitch
		cmp 		esi,srcmax
		jna 		y_loop

		pop 		ebx
		pop 		esi
		pop 		edi
		pop 		ebp
		emms
		ret
	};
}


#define ACCUMULATE(ADR) \
__asm	movd		mm1,[ADR]\
__asm	punpcklbw	mm1,mm5						/* ? Ri Gi Bi */\
__asm	movq		mm3,mm1 \
__asm	psubw		mm1,mm0						/* ? R-Ri G-Gi B-Bi */\
__asm	psllq		mm1,16						/* R-Ri G-Gi B-Bi 0 */\
__asm	pmaddwd		mm1,mm1						/* (R-Ri)+(G-Gi)  (B-Bi) */\
__asm	punpckldq	mm2,mm1						/* (B-Bi)  ? */\
__asm	paddd		mm1,mm2						/* (R-Ri)+(G-Gi)+(B-Bi)  ? */\
__asm	movq		mm2,mm7						/* sixteen */\
__asm	psrlq		mm1,mm4						/* sqerr(Q) */\
__asm	punpcklwd	mm1,mm1 \
__asm	punpckldq	mm1,mm1						/* sqerr(W) 4 times */\
__asm	psubusw		mm2,mm1						/* 16-sqerr */\
__asm	pmullw		mm3,mm2						/* ? Ri*(16-sqerr) Gi*(16-sqerr) Bi*(16-sqerr) */\
__asm	psrlq		mm2,48 \
__asm	paddw		mm6,mm2 \
__asm	psllq		mm3,16 \
__asm	paddw		mm6,mm3						/* Raccum Gaccum Baccum count */
		

static void __declspec(naked) ssmooth_runProc_radius1_MMX(Pixel32 *srcp, Pixel32 *dstp, int w, int h, int src_pitch, int dst_pitch, int strength)
// esp +															20				24		28		32		36				40				44
{
	static const __int64 sixteen = 0x0010001000100010i64;

	__asm {
		push		ebp
		push		edi
		push		esi
		push		ebx

		mov			esi,[esp+20]				; srcp
		mov			edi,[esp+24]				; dstp
		mov			edx,[esp+36]				; src_pitch
		mov			ebx,[esp+28]				; w

		xor			ecx,ecx
first_line:
		movq		mm0,[esi+ecx]
		movq		[edi+ecx],mm0
		add			ecx,8
		cmp			ecx,ebx
		jnae		first_line

		add			edi,[esp+40]
		movq		mm7,sixteen
		add			edi,4
		pxor		mm5,mm5						; zero
		sub			ebx,8						; w-2 pixels (borders)
		movd		mm4,[esp+44]				; strength+32

		mov			ebp,[esp+32]				; h
		sub			ebp,2
y_loop:
		mov			eax,[esi]					; first pixel on the line
		mov			ecx,ebx
		mov			[edi-4],eax
		shr			ecx,2
x_loop:
		pxor		mm6,mm6						; reset accumulator

		movd		mm0,[esi+edx+4]				; 0000?RGB 
		punpcklbw	mm0,mm5						; 0?0R0G0B CENTER PIXEL
		movq		mm1,mm0
		pmullw		mm1,mm7						; ? R*16 G*16 B*16
		movq		mm2,mm7						; sixteen
		psrlq		mm2,48
		paddw		mm6,mm2
		psllq		mm1,16
		paddw		mm6,mm1						; Raccum Gaccum Baccum count

		ACCUMULATE(esi)
		ACCUMULATE(esi+4)
		ACCUMULATE(esi+8)
		ACCUMULATE(esi+edx)
		ACCUMULATE(esi+edx+8)
		ACCUMULATE(esi+edx*2)
		ACCUMULATE(esi+edx*2+4)
		ACCUMULATE(esi+edx*2+8)
		
		movd		eax,mm6
		and			eax,0xffff
//		pmulhuw		mm6,[scaletab16+eax*8]		; scale pixel
		; tricky code to compensate the loss of pmulhuw (SSE)
			movq		mm1,[scaletab16+eax*8]
			pmulhw		mm6,mm1						; scale pixel
			movq		mm2,mm6
			psraw		mm6,16						; negative results ? (overflow)
			movq		mm0,mm2
			paddw		mm0,mm1
			pand		mm0,mm6
			pandn		mm6,mm2
			paddw		mm6,mm0


		psrlq		mm6,16
		packuswb	mm6,mm6

		movd		[edi],mm6
		add			edi,4
		add			esi,4
		dec			ecx
		jnz			x_loop

		mov			eax,[esi+4]					; last pixel on the line
		mov			[edi],eax
		add			esi,edx
		add			edi,[esp+40]
		sub			esi,ebx
		sub			edi,ebx
		dec			ebp
		jnz			y_loop

		add			esi,edx
		xor			ecx,ecx
		sub			edi,4

last_line:
		movq		mm0,[esi+ecx]
		movq		[edi+ecx],mm0
		add			ecx,8
		cmp			ecx,ebx
		jnae		last_line
		add			ebx,8
last_pixels:									; 1 or 2 remaining pixels
		mov			eax,[esi+ecx]
		mov			[edi+ecx],eax
		add			ecx,4
		cmp			ecx,ebx
		jne			last_pixels

		pop			ebx
		pop			esi
		pop			edi
		pop			ebp
		emms
		ret
	};
}


int ssmooth_runProc(const FilterActivation *fa, const FilterFunctions *ff)
{
	ssmoothFilterData *fd = (ssmoothFilterData *)fa->filter_data;
	Pixel32 *src = (Pixel32 *)fa->src.data;
	Pixel32 *dst = (Pixel32 *)fa->dst.data;
	int ppitch=fa->src.pitch>>2;
	int strength=fd->strength;
	int radius=fd->radius;
	int *squaretab=fd->squaretab;

	if (g_MMXenabled)
		if (radius==1)
			ssmooth_runProc_radius1_MMX(src,dst,fa->src.w<<2,fa->src.h,fa->src.pitch,fa->dst.pitch,strength+32);
		else
			ssmooth_runProc_MMX(src,dst,(fa->src.w-1)<<2,fa->src.h,fa->src.pitch,fa->dst.pitch,radius,strength+32);
	else {
		for (int y=0; y<fa->src.h; y++) {
			int yymin = max(y-radius,0)*ppitch;
			int yymax = min(y+radius,fa->src.h-1)*ppitch;
			int yofs = y * ppitch;

			for (int x=0; x<fa->src.w; x++) {
				int xxmin = max(x-radius,0);
				int xxmax = min(x+radius,fa->src.w-1);
				Pixel32 center = src[yofs+x];
				int *cbtab = squaretab + 255 - (center & 0xff);
				int *cgtab = squaretab + 255 - ((center>>8) & 0xff);
				int *crtab = squaretab + 255 - ((center>>16) & 0xff);
				unsigned int raccum = 0, gaccum = 0, baccum = 0, count = 0;

				for (int yyofs=yymin; yyofs<=yymax; yyofs+=ppitch) {
					for (int xx=xxmin; xx<=xxmax; xx++) {
						Pixel32 c = src[yyofs+xx];
						int b = c & 0xff;
						int g = (c>>8) & 0xff;
						int r = (c>>16) & 0xff;
						int sqerr = (cbtab[b] + cgtab[g] + crtab[r]) >> strength;

						if (sqerr>16) sqerr=16;
						sqerr = 16 - sqerr;

						baccum += b * sqerr;
						gaccum += g * sqerr;
						raccum += r * sqerr;
						count += sqerr;
					}
				}
				baccum = baccum / count;
				gaccum = gaccum / count;
				raccum = raccum / count;
				dst[x] = baccum + (gaccum<<8) + (raccum<<16);
			}
			dst += fa->dst.pitch >> 2;
		}
	}

	return 0;
}
