// ----------------------------------------------------------------------------
// Copyright 2000, Paul Nettle. All rights reserved.
//
// Fast floating point substitution routines
//
// This file has been entered into the public domain by the author.
// ----------------------------------------------------------------------------
//
//      COMPILE WITH WATCOM
//
// ----------------------------------------------------------------------------
//
//	Routine		Equivalent C code
//      -----------------------------------------
//	ICHOP		(int) value
//	FCHOP		(float) ((int) value)
//	IFLOOR		(int) floor(value)
//	FFLOOR		floor(value)
//	ICEIL		(int) ceil(value)
//	FCEIL		ceil(value)
//	FFRAC		value - floor(value)
//	SUB_PIX		ceil(value) - value
//	FTOX24		(int) (value * 0x1000000)
//	FTOX16		(int) (value * 0x10000)
//	FTOX8		(int) (value * 0x100)
//	IS_NEG		(bool) (value1 < 0.0f)
//	IS_LESS		(bool) (value1 < value2)
//	IS_LEQUAL	(bool) (value1 <= value2)
//	IS_GREATER	(bool) (value1 > value2)
//	IS_GEQUAL	(bool) (value1 >= value2)
//
// ----------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
  // ----------------------------------------------------------------------------
#define	ABS(a)	((a) < 0 ? -(a) : (a))
  // ----------------------------------------------------------------------------
static	unsigned char	chptab[512];
static	unsigned char	f24tab[512];
static	unsigned char	f16tab[512];
static	unsigned char	fx8tab[512];
static	unsigned int	msktab[256];
static	unsigned short	_savedControl;
static	unsigned short	_fchopControl;
static	unsigned short	_ffloorControl;
static	unsigned short	_fceilControl;
  // ----------------------------------------------------------------------------
static	unsigned int	timerLo, timerHi, timerAdjust, writeValue;
  static	unsigned int	_t_conv_int[2]  = {0,0x43380000};
static	unsigned int	_t_conv24_08[2] = {0,0x42B80000};
static	unsigned int	_t_conv16_16[2] = {0,0x42380000};
static	unsigned int	_t_conv08_24[2] = {0,0x41B80000};
static	unsigned int	_t_fxfl[2];
  // ----------------------------------------------------------------------------
void	startTimer();
#pragma aux startTimer =		\
	"db	0x0F, 0x31"		\
	"mov	timerLo,eax"		\
	"mov	timerHi,edx"		\
	modify exact [edx eax];
  // ----------------------------------------------------------------------------
int	stopTimer();
#pragma aux stopTimer =			\
	"db	0x0F, 0x31"		\
	"sub	eax,timerLo"		\
	value [eax]			\
	modify exact [edx eax];
  // ----------------------------------------------------------------------------
short	FINIT();
#pragma aux FINIT =			\
	"fstcw	_savedControl"		\
	"mov	ax, _savedControl"	\
	"and	ax, 0xf3ff"		\
	"or	ax, 0x0C00"		\
	"mov	_fchopControl, ax"	\
	"and	ax, 0xf3ff"		\
	"or	ax, 0x0400"		\
	"mov	_ffloorControl, ax"	\
	"and	ax, 0xf3ff"		\
	"or	ax, 0x0800"		\
	"mov	_fceilControl, ax"	\
	parm nomemory caller []		\
	modify nomemory exact [];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int	ICHOP( float Val );
#pragma aux ICHOP =			\
	"fldcw	_fchopControl"		\
	"fistp	dword ptr writeValue"	\
	"mov	eax,writeValue"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int	ICHOP2( float Val );
#pragma aux ICHOP2 =			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"or	eax,0x800000"		\
	"sar	edx,31"	 		\
	"mov	cl,chptab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,chptab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float	FCHOP( float Val );
#pragma aux FCHOP =			\
	"fldcw	_fchopControl"		\
	"fistp	dword ptr writeValue"	\
	"fild	writeValue"		\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 30 cycles
float	FCHOP2( float Val );
#pragma aux FCHOP2 =			\
	"fldcw	_fchopControl"		\
	"frndint"			\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int	IFLOOR( float Val );
#pragma aux IFLOOR =			\
	"fldcw	_ffloorControl"		\
	"fistp	dword ptr writeValue"	\
	"mov	eax,writeValue"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int	IFLOOR2( float Val );
#pragma aux IFLOOR2 =			\
	"fldcw	_ffloorControl"		\
	"fadd	qword ptr _t_conv_int"	\
	"fstp	qword ptr _t_fxfl"	\
	"mov	eax,_t_fxfl"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 25 cycles
int	IFLOOR3( float Val );
#pragma aux IFLOOR3 =			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"or	eax,0x800000"		\
	"sar	edx,31"	 		\
	"mov	esi,eax"		\
	"mov	cl,chptab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,chptab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	"or	edx,edx"		\
	"jz	l1"			\
	"and	esi,msktab[ebx * 4]"	\
	"jz	l1"			\
	"dec	eax"			\
	"l1:"				\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx esi];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float	FFLOOR( float Val );
#pragma aux FFLOOR =			\
	"fldcw	_ffloorControl"		\
	"fistp	dword ptr writeValue"	\
	"fild	writeValue"		\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 30 cycles
float	FFLOOR2( float Val );
#pragma aux FFLOOR2 =			\
	"fldcw	_ffloorControl"		\
	"frndint"			\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int	ICEIL( float Val );
#pragma aux ICEIL =			\
	"fldcw	_fceilControl"		\
	"fistp	dword ptr writeValue"	\
	"mov	eax,writeValue"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 26 cycles
int	ICEIL2( float Val );
#pragma aux ICEIL2 =			\
	"or	eax,eax"		\
	"jz	l1"			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"sar	edx,31"	 		\
	"or	eax,0x800000"		\
	"mov	esi,eax"		\
	"mov	cl,chptab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,chptab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	"or	edx,edx"		\
	"jnz	l1"			\
	"and	esi,msktab[ebx * 4]"	\
	"jz	l1"			\
	"inc	eax"			\
	"l1:"				\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx esi];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float	FCEIL( float Val );
#pragma aux FCEIL =			\
	"fldcw	_fceilControl"		\
	"fistp	dword ptr writeValue"	\
	"fild	writeValue"		\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 31 cycles
float	FCEIL2( float Val );
#pragma aux FCEIL2 =			\
	"fldcw	_fceilControl"		\
	"frndint"			\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 22 cycles
float	FFRAC( float Val );
#pragma aux FFRAC =			\
	"fldcw	_ffloorControl"		\
	"fist	dword ptr writeValue"	\
	"fisub	dword ptr writeValue"	\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 34 cycles
float	FFRAC2( float Val );
#pragma aux FFRAC2 =			\
	"fldcw	_ffloorControl"		\
	"fld	st(0)"			\
	"frndint"			\
	"fsubp	st(1), st"		\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 22 cycles
float	SUB_PIX( float Val );
#pragma aux SUB_PIX =			\
	"fldcw	_fceilControl"		\
	"fist	dword ptr writeValue"	\
	"fisubr	dword ptr writeValue"	\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 34 cycles
float	SUB_PIX2( float Val );
#pragma aux SUB_PIX2 =			\
	"fldcw	_fceilControl"		\
	"fld	st(0)"			\
	"frndint"			\
	"fsubrp	st(1), st"		\
	value [8087]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int	FTOX24( float Val );
#pragma aux FTOX24 =			\
	"fadd	qword ptr _t_conv08_24"	\
	"fstp	qword ptr _t_fxfl"	\
	"mov	eax,_t_fxfl"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int	FTOX242( float Val );
#pragma aux FTOX242 =			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"or	eax,0x800000"		\
	"sar	edx,31"	 		\
	"mov	cl,f24tab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,f24tab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int	FTOX16( float Val );
#pragma aux FTOX16 =			\
	"fadd	qword ptr _t_conv16_16"	\
	"fstp	qword ptr _t_fxfl"	\
	"mov	eax,_t_fxfl"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int	FTOX162( float Val );
#pragma aux FTOX162 =			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"or	eax,0x800000"		\
	"sar	edx,31"	 		\
	"mov	cl,f16tab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,f16tab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int	FTOX8( float Val );
#pragma aux FTOX8 =			\
	"fadd	qword ptr _t_conv24_08"	\
	"fstp	qword ptr _t_fxfl"	\
	"mov	eax,_t_fxfl"		\
	value [eax]			\
	parm nomemory caller [8087]	\
	modify nomemory exact [eax 8087];
  // ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int	FTOX82( float Val );
#pragma aux FTOX82 =			\
	"mov	ebx,eax"		\
	"and	eax,0x7fffff"		\
	"mov	edx,ebx"		\
	"and	ebx,0x7f800000"		\
	"shr	ebx,23" 		\
	"or	eax,0x800000"		\
	"sar	edx,31"	 		\
	"mov	cl,fx8tab[ebx*2+1]"	\
	"sar	eax,cl"			\
	"mov	cl,fx8tab[ebx*2+0]"	\
	"sal	eax,cl"			\
	"xor	eax,edx" 		\
	"sub	eax,edx"		\
	value [eax]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax ebx ecx edx];
  // ----------------------------------------------------------------------------
bool	IS_NEG( float Val );
#pragma aux IS_NEG =			\
	"shr	eax,31"			\
	value [al]			\
	parm nomemory caller [eax]	\
	modify nomemory exact [eax];
  // ----------------------------------------------------------------------------
bool	IS_LESS( float Val1, float Val2 );
#pragma aux IS_LESS =			\
	"sub	eax,ebx"		\
	"shr	eax,31"			\
	value [al]			\
	parm nomemory caller [eax] [ebx]\
	modify nomemory exact [eax];
  // ----------------------------------------------------------------------------
bool	IS_LEQUAL( float Val1, float Val2 );
#pragma aux IS_LEQUAL =			\
	"sub	eax,ebx"		\
	"shr	eax,31"			\
	value [al]			\
	parm nomemory caller [eax] [ebx]\
	modify nomemory exact [eax];
  // ----------------------------------------------------------------------------
bool	IS_GREATER( float Val1, float Val2 );
#pragma aux IS_GREATER =		\
	"sub	eax,ebx"		\
	"shr	eax,31"			\
	value [al]			\
	parm nomemory caller [ebx] [eax]\
	modify nomemory exact [eax];
  // ----------------------------------------------------------------------------
bool	IS_GEQUAL( float Val1, float Val2 );
#pragma aux IS_GEQUAL =			\
	"sub	eax,ebx"		\
	"shr	eax,31"			\
	value [al]			\
	parm nomemory caller [ebx] [eax]\
	modify nomemory exact [eax];
  // ----------------------------------------------------------------------------
void	writeRead();
#pragma aux writeRead =			\
	"mov	writeValue,eax"		\
	"mov	ebx,writeValue"		\
	modify nomemory exact [eax ebx];
  // ----------------------------------------------------------------------------
void   	readRead();
#pragma aux readRead =			\
	"mov	eax,writeValue"		\
	"mov	ebx,writeValue"		\
	modify nomemory exact [eax ebx];
  // ----------------------------------------------------------------------------
void   	flushWriteBack();
#pragma aux flushWriteBack =		\
	"mov	writeValue,eax"		\
	"mov	writeValue,eax"		\
	"mov	writeValue,eax"		\
	"mov	writeValue,eax"		\
	"mov	writeValue,eax"		\
	modify nomemory;
  // ----------------------------------------------------------------------------
float	makeTestValue()
{
	// The formula for the test values uses three random values per
	// iteration in this way:  (r1 - r2) * (r3 - r4) / r5
	
	float	r1 = rand();
	float	r2 = rand();
	float	r3 = rand();
	float	r4 = rand();
	float	r5 = rand();
  	// Make sure we don't get a divide by 0
	while(!r5) r5 = rand();
  	// Make the test value
	return (r1 - r2) * (r3 - r4) / r5;
}
  // ----------------------------------------------------------------------------
void	initFPU()
{
	// Setup the tiny tables
	//
	// Yeah.. tables can suck for the cache, BUT, these tables only have
	// a used range of about 40 bytes someplace in the middle, and the most
	// commonly used portion of each table is within a single cache line,
	// so the cache thrashing is minimized.
	for (int e = 0; e < 256; e++)
	{
		int x = 150 - e;
		if (x > 31)	x = 31;
		if (x < -31)	x = -31;
		if (x < 0)	chptab[e*2+0] = (unsigned char) -x;
		else		chptab[e*2+1] = (unsigned char)  x;
  		x = 150 - e - 24;
		if (x > 31)	x = 31;
		if (x < -31)	x = -31;
		if (x < 0)	f24tab[e*2+0] = (unsigned char) -x;
		else		f24tab[e*2+1] = (unsigned char)  x;
  		x = 150 - e - 16;
		if (x > 31)	x = 31;
		if (x < -31)	x = -31;
		if (x < 0)	f16tab[e*2+0] = (unsigned char) -x;
		else		f16tab[e*2+1] = (unsigned char)  x;
  		x = 150 - e - 8;
		if (x > 31)	x = 31;
		if (x < -31)	x = -31;
		if (x < 0)	fx8tab[e*2+0] = (unsigned char) -x;
		else		fx8tab[e*2+1] = (unsigned char)  x;
  		x = 150 - e - 1;
		if (x > 31)	x = 31;
		if (x < -31)	x = -31;
		if (x < 0)	msktab[e] = 0;
		else		msktab[e] = (unsigned int) 0xffffffff >> (31 - x);
	}
  	// Finally, init the FPU routines
	FINIT();
}
  // ----------------------------------------------------------------------------
void	testICHOP(const int count)
{
	printf( "   Testing ICHOP...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = ICHOP(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) testValue;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testICHOP2(const int count)
{
	printf( "   Testing ICHOP2..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = ICHOP2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) testValue;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFCHOP(const int count)
{
	printf( "   Testing FCHOP...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FCHOP(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (int) ((float) testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFCHOP2(const int count)
{
	printf( "   Testing FCHOP2..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FCHOP2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (int) ((float) testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIFLOOR(const int count)
{
	printf( "   Testing IFLOOR..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = IFLOOR(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) ((float) floor(testValue));
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIFLOOR2(const int count)
{
	printf( "   Testing IFLOOR2.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = IFLOOR2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) ((float) floor(testValue));
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIFLOOR3(const int count)
{
	printf( "   Testing IFLOOR3.." );
  	int	expected = 0;
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = IFLOOR3(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) ((float) floor(testValue));
		slowTime += stopTimer() - timerAdjust;
  		// Expect -0.0f to return different results
		if ((*(unsigned int *) &testValue) == 0x80000000) expected++;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", expected == diffCount ? "OK":"ERROR!", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFFLOOR(const int count)
{
	printf( "   Testing FFLOOR..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FFLOOR(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) floor(testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFFLOOR2(const int count)
{
	printf( "   Testing FFLOOR2.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FFLOOR2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) floor(testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testICEIL(const int count)
{
	printf( "   Testing ICEIL...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = ICEIL(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) ((float) ceil(testValue));
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testICEIL2(const int count)
{
	printf( "   Testing ICEIL2..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		int	test = ICEIL2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) ((float) ceil(testValue));
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFCEIL(const int count)
{
	printf( "   Testing FCEIL...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FCEIL(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) ceil(testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFCEIL2(const int count)
{
	printf( "   Testing FCEIL2..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FCEIL2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) ceil(testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFFRAC(const int count)
{
	printf( "   Testing FFRAC...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FFRAC(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = testValue - (float) floor((double) testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFFRAC2(const int count)
{
	printf( "   Testing FFRAC2..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = FFRAC2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = testValue - (float) floor((double) testValue);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testSUB_PIX(const int count)
{
	printf( "   Testing SUB_PIX.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = SUB_PIX(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) ceil((double) testValue) - testValue;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testSUB_PIX2(const int count)
{
	printf( "   Testing SUB_PIX2." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		float	test = SUB_PIX2(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		float	real = (float) ceil((double) testValue) - testValue;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIS_NEG(const int count)
{
	printf( "   Testing IS_NEG..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue = makeTestValue();
  		startTimer();
		bool	test = IS_NEG(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		bool	real = testValue < 0.0f;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real && *(int *) &testValue != 0x80000000)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIS_LESS(const int count)
{
	printf( "   Testing IS_LESS.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue1 = makeTestValue();
		float	testValue2 = makeTestValue();
  		if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
		if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
  		if (testValue1 < 0.0f) testValue1 = -testValue1;
		if (testValue2 < 0.0f) testValue2 = -testValue2;
  		startTimer();
		bool	test = IS_LESS(testValue1, testValue2);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		bool	real = testValue1 < testValue2;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIS_LEQUAL(const int count)
{
	printf( "   Testing IS_LEQ..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue1 = makeTestValue();
		float	testValue2 = makeTestValue();
  		if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
		if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
  		if (testValue1 < 0.0f) testValue1 = -testValue1;
		if (testValue2 < 0.0f) testValue2 = -testValue2;
  		startTimer();
		bool	test = IS_LEQUAL(testValue1, testValue2);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		bool	real = testValue1 <= testValue2;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIS_GREATER(const int count)
{
	printf( "   Testing IS_GREAT." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue1 = makeTestValue();
		float	testValue2 = makeTestValue();
  		if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
		if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
  		if (testValue1 < 0.0f) testValue1 = -testValue1;
		if (testValue2 < 0.0f) testValue2 = -testValue2;
  		startTimer();
		bool	test = IS_GREATER(testValue1, testValue2);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		bool	real = testValue1 > testValue2;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testIS_GEQUAL(const int count)
{
	printf( "   Testing IS_GEQ..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		float	testValue1 = makeTestValue();
		float	testValue2 = makeTestValue();
  		if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
		if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
  		if (testValue1 < 0.0f) testValue1 = -testValue1;
		if (testValue2 < 0.0f) testValue2 = -testValue2;
  		startTimer();
		bool	test = IS_GEQUAL(testValue1, testValue2);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		bool	real = testValue1 >= testValue2;
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX24(const int count)
{
	printf( "   Testing FTOX24..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f;
  		startTimer();
		int	test = FTOX24(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x1000000);
		slowTime += stopTimer() - timerAdjust;
  		if (ABS(test - real) > 1)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX242(const int count)
{
	printf( "   Testing FTOX242.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f;
  		startTimer();
		int	test = FTOX242(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x1000000);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX16(const int count)
{
	printf( "   Testing FTOX16..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f;
  		startTimer();
		int	test = FTOX16(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x10000);
		slowTime += stopTimer() - timerAdjust;
  		if (ABS(test - real) > 1)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX162(const int count)
{
	printf( "   Testing FTOX162.." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f;
  		startTimer();
		int	test = FTOX162(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x10000);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX8(const int count)
{
	printf( "   Testing FTOX8...." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f;
  		startTimer();
		int	test = FTOX8(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x100);
		slowTime += stopTimer() - timerAdjust;
  		if (ABS(test - real) > 1)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testFTOX82(const int count)
{
	printf( "   Testing FTOX82..." );
  	unsigned int	fastTime = 0, slowTime = 0, diffCount = 0;
  	for (int i = 0; i < count; i++)
	{
		enum	{MAX_TESTVAL = RAND_MAX * RAND_MAX};
		float	testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f;
  		startTimer();
		int	test = FTOX82(testValue);
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		int	real = (int) (testValue * (float) 0x100);
		slowTime += stopTimer() - timerAdjust;
  		if (test != real)	diffCount++;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
  	printf( "Done.  %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
  // ----------------------------------------------------------------------------
void	testMemSpeed(const int count)
{
	printf( "   Testing mem spd.." );
  	unsigned int	fastTime = 0, slowTime = 0, flushTime = 0;
  	for (int i = 0; i < count; i++)
	{
		startTimer();
		readRead();
		fastTime += stopTimer() - timerAdjust;
  		startTimer();
		writeRead();
		slowTime += stopTimer() - timerAdjust;
  		startTimer();
		flushWriteBack();
		flushTime += stopTimer() - timerAdjust;
	}
  	fastTime = (int) ((float) fastTime / (float) count);
	slowTime = (int) ((float) slowTime / (float) count);
	flushTime = (int) ((float) flushTime / (float) count);
  	printf( "Done.  r/r ticks: %d w/r ticks: %d flush: %d\n", fastTime, slowTime, flushTime);
}
  // ----------------------------------------------------------------------------
void	main(int argc, char *argv[])
{
	setbuf(stdout, 0);
  	// Warn the user about the /op option
#ifndef __SW_OP
	printf( "---------------------------------------------------------\n" );
	printf( "\aThis program was not compiled with /op.  You can expect\n" );
	printf( "problems since Watcom is less accurate without /op.\n      " );
	printf( "---------------------------------------------------------\n" );
	printf( "\n" );
#endif
  	// Init the flaoting point routines
	initFPU();
  	// Calculate the timer routine overhead
	printf( "Syncronizing the timer..." );
  	int	timerTestCount = 1000000;
  	for (int i = 0; i < timerTestCount; i++)
	{
		startTimer();
		timerAdjust += stopTimer();
	}
  	timerAdjust = (int) ((float) timerAdjust / (float) timerTestCount);
  	printf( "Done.  Timer adjustment is %d cycles.\n", timerAdjust );
  	// Prime the random number generator
	srand(time(NULL));
  	// Get the run count
	int	count = 1000000;
	if (argc > 1)	count = atoi(argv[1]);
  	// Start testing
	printf( "Testing each routine with %d iterations.\n", count );
  	printf( "\nFastest routines:\n" );
  	testICHOP(count);
	testFCHOP(count);
	testIFLOOR(count);
	testFFLOOR(count);
	testICEIL(count);
	testFCEIL(count);
	testFFRAC(count);
	testSUB_PIX(count);
	testIS_NEG(count);
	testIS_LESS(count);
	testIS_LEQUAL(count);
	testIS_GREATER(count);
	testIS_GEQUAL(count);
	testFTOX24(count);
	testFTOX16(count);
	testFTOX8(count);
  	printf( "\nMedium-speed routines:\n" );
  	testICHOP2(count);
	testFCHOP2(count);
	testIFLOOR2(count);
	testFFLOOR2(count);
	testICEIL2(count);
	testFCEIL2(count);
	testFFRAC2(count);
	testSUB_PIX2(count);
	testFTOX242(count);
	testFTOX162(count);
	testFTOX82(count);
  	printf( "\nSlowest routines:\n" );
  	testIFLOOR3(count);
  	printf( "\nTesting memory:\n" );
  	testMemSpeed(count);
}
  // ----------------------------------------------------------------------------
   |