diff --git a/GLideNHQ/CMakeLists.txt b/GLideNHQ/CMakeLists.txt
new file mode 100644
index 00000000..f51f5b9c
--- /dev/null
+++ b/GLideNHQ/CMakeLists.txt
@@ -0,0 +1,87 @@
+cmake_minimum_required(VERSION 2.6)
+
+project( GLideNHQ )
+
+set(GLideNHQ_SOURCES
+  Ext_TxFilter.cpp
+  TextureFilters.cpp
+  TextureFilters_2xsai.cpp
+  TextureFilters_hq2x.cpp
+  TextureFilters_hq4x.cpp
+  TxCache.cpp
+  TxDbg.cpp
+  TxFilter.cpp
+  TxFilterExport.cpp
+  TxHiResCache.cpp
+  TxImage.cpp
+  TxQuantize.cpp
+  TxReSample.cpp
+  TxTexCache.cpp
+  TxUtil.cpp
+)
+
+include_directories( inc )
+
+set(Boost_USE_STATIC_LIBS   ON)
+set(Boost_USE_MULTITHREADED ON)
+find_package(Boost COMPONENTS thread filesystem system REQUIRED)
+include_directories(${Boost_INCLUDE_DIRS})
+
+if(WIN32)
+  LINK_DIRECTORIES( lib )
+  add_definitions(
+	-DWIN32
+	-D_CRT_SECURE_NO_WARNINGS
+  )
+endif(WIN32)
+
+
+if (WIN32 AND __COMPILER_GNU)
+   # mingw-gcc fails to link boost::thread
+   add_definitions(-DBOOST_THREAD_USE_LIB)
+endif (WIN32 AND __COMPILER_GNU)
+
+# Build type
+
+if( NOT CMAKE_BUILD_TYPE)
+	set( CMAKE_BUILD_TYPE Release)
+endif( NOT CMAKE_BUILD_TYPE)
+
+if( CMAKE_BUILD_TYPE STREQUAL "Debug")
+	set( CMAKE_BUILD_TYPE Debug)
+	set( DEBUG_BUILD TRUE)
+	add_definitions(
+		-DDEBUG
+	)
+endif( CMAKE_BUILD_TYPE STREQUAL "Debug")
+
+find_package(OpenGL REQUIRED)
+include_directories(${OpenGL_INCLUDE_DIRS})
+link_directories(${OpenGL_LIBRARY_DIRS})
+add_definitions(${OpenGL_DEFINITIONS})
+if(NOT OPENGL_FOUND)
+	message(ERROR " OPENGL not found!")
+endif(NOT OPENGL_FOUND)
+
+SET(GCC_CPP11_COMPILE_FLAGS "-std=c++0x -static-libgcc -static-libstdc++")
+SET( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} ${GCC_CPP11_COMPILE_FLAGS}" )
+SET(GCC_STATIC_LINK_FLAGS "-static-libgcc -static-libstdc++")
+SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${GCC_STATIC_LINK_FLAGS}" )
+
+add_library( GLideNHQ STATIC ${GLideNHQ_SOURCES})
+
+SET_TARGET_PROPERTIES(
+  GLideNHQ
+  PROPERTIES
+  LINKER_LANGUAGE CXX # Or else we get an error message, because cmake can't figure out from the ".o"-suffix that it is a C-linker we need.
+  PREFIX ""
+  LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/plugin
+  )
+
+if( CMAKE_BUILD_TYPE STREQUAL "Debug")
+target_link_libraries(GLideNHQ ${OPENGL_LIBRARIES} ${Boost_LIBRARIES} libpng zlib )
+endif( CMAKE_BUILD_TYPE STREQUAL "Debug")
+
+if( CMAKE_BUILD_TYPE STREQUAL "Release")
+target_link_libraries(GLideNHQ ${OPENGL_LIBRARIES} ${Boost_LIBRARIES} libpngd zlibd )
+endif( CMAKE_BUILD_TYPE STREQUAL "Debug")
diff --git a/GLideNHQ/Ext_TxFilter.cpp b/GLideNHQ/Ext_TxFilter.cpp
new file mode 100644
index 00000000..171e03b8
--- /dev/null
+++ b/GLideNHQ/Ext_TxFilter.cpp
@@ -0,0 +1,172 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <memory.h>
+#include <stdlib.h>
+#include "Ext_TxFilter.h"
+
+typedef boolean (*txfilter_init)(int maxwidth, int maxheight, int maxbpp,
+                                 int options, int cachesize,
+                                 wchar_t *path, wchar_t *ident,
+                                 dispInfoFuncExt callback);
+
+typedef void (*txfilter_shutdown)(void);
+
+typedef boolean (*txfilter_filter)(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
+                                   uint64 g64crc, GHQTexInfo *info);
+
+typedef boolean (*txfilter_hirestex)(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info);
+
+typedef uint64 (*txfilter_checksum)(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette);
+
+typedef boolean (*txfilter_dmptx)(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64);
+
+typedef boolean (*txfilter_reloadhirestex)();
+
+static struct {
+  TXHMODULE lib;
+  txfilter_init init;
+  txfilter_shutdown shutdown;
+  txfilter_filter filter;
+  txfilter_hirestex hirestex;
+  txfilter_checksum checksum;
+  txfilter_dmptx dmptx;
+  txfilter_reloadhirestex reloadhirestex;
+} txfilter;
+
+void ext_ghq_shutdown(void)
+{
+  if (txfilter.shutdown)
+    (*txfilter.shutdown)();
+
+  if (txfilter.lib) {
+    DLCLOSE(txfilter.lib);
+    memset(&txfilter, 0, sizeof(txfilter));
+  }
+}
+
+boolean ext_ghq_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize,
+                     wchar_t *path, wchar_t *ident,
+                     dispInfoFuncExt callback)
+{
+  boolean bRet = 0;
+
+  if (!txfilter.lib) {
+    wchar_t curpath[MAX_PATH];
+    wcscpy(curpath, path);
+#ifdef WIN32
+#ifdef GHQCHK
+    wcscat(curpath, L"\\ghqchk.dll");
+#else
+    wcscat(curpath, L"\\GlideHQ.dll");
+#endif
+    txfilter.lib = DLOPEN(curpath);
+#else
+    char cbuf[MAX_PATH];
+#ifdef GHQCHK
+    wcscat(curpath, L"/ghqchk.so");
+#else
+    wcscat(curpath, L"/GlideHQ.so");
+#endif
+    wcstombs(cbuf, curpath, MAX_PATH);
+    txfilter.lib = DLOPEN(cbuf);
+#endif
+  }
+
+  if (txfilter.lib) {
+    if (!txfilter.init)
+      txfilter.init = (txfilter_init)DLSYM(txfilter.lib, "txfilter_init");
+    if (!txfilter.shutdown)
+      txfilter.shutdown = (txfilter_shutdown)DLSYM(txfilter.lib, "txfilter_shutdown");
+    if (!txfilter.filter)
+      txfilter.filter = (txfilter_filter)DLSYM(txfilter.lib, "txfilter");
+    if (!txfilter.hirestex)
+      txfilter.hirestex = (txfilter_hirestex)DLSYM(txfilter.lib, "txfilter_hirestex");
+    if (!txfilter.checksum)
+      txfilter.checksum = (txfilter_checksum)DLSYM(txfilter.lib, "txfilter_checksum");
+    if (!txfilter.dmptx)
+      txfilter.dmptx = (txfilter_dmptx)DLSYM(txfilter.lib, "txfilter_dmptx");
+    if (!txfilter.reloadhirestex)
+      txfilter.reloadhirestex = (txfilter_reloadhirestex)DLSYM(txfilter.lib, "txfilter_reloadhirestex");
+  }
+
+  if (txfilter.init && txfilter.shutdown && txfilter.filter &&
+      txfilter.hirestex && txfilter.checksum /*&& txfilter.dmptx && txfilter.reloadhirestex */)
+    bRet = (*txfilter.init)(maxwidth, maxheight, maxbpp, options, cachesize, path, ident, callback);
+  else
+    ext_ghq_shutdown();
+
+  return bRet;
+}
+
+boolean ext_ghq_txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
+                                uint64 g64crc, GHQTexInfo *info)
+{
+  boolean ret = 0;
+
+  if (txfilter.filter)
+    ret = (*txfilter.filter)(src, srcwidth, srcheight, srcformat,
+                             g64crc, info);
+
+  return ret;
+}
+
+boolean ext_ghq_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info)
+{
+  boolean ret = 0;
+
+  if (txfilter.hirestex)
+    ret = (*txfilter.hirestex)(g64crc, r_crc64, palette, info);
+
+  return ret;
+}
+
+uint64 ext_ghq_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette)
+{
+  uint64 ret = 0;
+
+  if (txfilter.checksum)
+    ret = (*txfilter.checksum)(src, width, height, size, rowStride, palette);
+
+  return ret;
+}
+
+boolean ext_ghq_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64)
+{
+  boolean ret = 0;
+
+  if (txfilter.dmptx)
+    ret = (*txfilter.dmptx)(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
+
+  return ret;
+}
+
+boolean ext_ghq_reloadhirestex()
+{
+  boolean ret = 0;
+
+  if (txfilter.reloadhirestex)
+    ret = (*txfilter.reloadhirestex)();
+
+  return ret;
+}
diff --git a/GLideNHQ/Ext_TxFilter.h b/GLideNHQ/Ext_TxFilter.h
new file mode 100644
index 00000000..2ea585a8
--- /dev/null
+++ b/GLideNHQ/Ext_TxFilter.h
@@ -0,0 +1,211 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __EXT_TXFILTER_H__
+#define __EXT_TXFILTER_H__
+
+#ifdef WIN32
+#include <windows.h>
+#define TXHMODULE HMODULE
+#define DLOPEN(a) LoadLibraryW(a)
+#define DLCLOSE(a) FreeLibrary(a)
+#define DLSYM(a, b) GetProcAddress(a, b)
+#define GETCWD(a, b) GetCurrentDirectoryW(a, b)
+#define CHDIR(a) SetCurrentDirectoryW(a)
+#else
+#include <iostream>
+#include <dlfcn.h> 
+#define MAX_PATH 4095
+#define TXHMODULE void*
+#define DLOPEN(a) dlopen(a, RTLD_LAZY|RTLD_GLOBAL)
+#define DLCLOSE(a) dlclose(a)
+#define DLSYM(a, b) dlsym(a, b)
+#define GETCWD(a, b) getcwd(b, a)
+#define CHDIR(a) chdir(a)
+#endif
+
+#ifdef __MSC__
+typedef __int64 int64;
+typedef unsigned __int64 uint64;
+#else
+typedef long long int64;
+typedef unsigned long long uint64;
+typedef unsigned char boolean;
+#endif
+
+#define NO_OPTIONS          0x00000000
+
+#define FILTER_MASK         0x000000ff
+#define NO_FILTER           0x00000000
+#define SMOOTH_FILTER_MASK  0x0000000f
+#define NO_SMOOTH_FILTER    0x00000000
+#define SMOOTH_FILTER_1     0x00000001
+#define SMOOTH_FILTER_2     0x00000002
+#define SMOOTH_FILTER_3     0x00000003
+#define SMOOTH_FILTER_4     0x00000004
+#define SHARP_FILTER_MASK   0x000000f0
+#define NO_SHARP_FILTER     0x00000000
+#define SHARP_FILTER_1      0x00000010
+#define SHARP_FILTER_2      0x00000020
+
+#define ENHANCEMENT_MASK    0x00000f00
+#define NO_ENHANCEMENT      0x00000000
+#define X2_ENHANCEMENT      0x00000100
+#define X2SAI_ENHANCEMENT   0x00000200
+#define HQ2X_ENHANCEMENT    0x00000300
+#define LQ2X_ENHANCEMENT    0x00000400
+#define HQ4X_ENHANCEMENT    0x00000500
+#define HQ2XS_ENHANCEMENT   0x00000600
+#define LQ2XS_ENHANCEMENT   0x00000700
+
+#define COMPRESSION_MASK    0x0000f000
+#define NO_COMPRESSION      0x00000000
+#define FXT1_COMPRESSION    0x00001000
+#define NCC_COMPRESSION     0x00002000
+#define S3TC_COMPRESSION    0x00003000
+
+#define HIRESTEXTURES_MASK  0x000f0000
+#define NO_HIRESTEXTURES    0x00000000
+#define GHQ_HIRESTEXTURES   0x00010000
+#define RICE_HIRESTEXTURES  0x00020000
+#define JABO_HIRESTEXTURES  0x00030000
+
+#define COMPRESS_TEX        0x00100000
+#define COMPRESS_HIRESTEX   0x00200000
+#define GZ_TEXCACHE         0x00400000
+#define GZ_HIRESTEXCACHE    0x00800000
+#define DUMP_TEXCACHE       0x01000000
+#define DUMP_HIRESTEXCACHE  0x02000000
+#define TILE_HIRESTEX       0x04000000
+#define UNDEFINED_0         0x08000000
+#define FORCE16BPP_HIRESTEX 0x10000000
+#define FORCE16BPP_TEX      0x20000000
+#define LET_TEXARTISTS_FLY  0x40000000 /* a little freedom for texture artists */
+#define DUMP_TEX            0x80000000
+
+#ifndef __GLIDE_H__ /* GLIDE3 */
+/* from 3Dfx Interactive Inc. glide.h */
+#define GR_TEXFMT_ALPHA_8           0x2
+#define GR_TEXFMT_INTENSITY_8       0x3
+
+#define GR_TEXFMT_ALPHA_INTENSITY_44 0x4
+#define GR_TEXFMT_P_8                0x5
+
+#define GR_TEXFMT_RGB_565            0xa
+#define GR_TEXFMT_ARGB_1555          0xb
+#define GR_TEXFMT_ARGB_4444          0xc
+#define GR_TEXFMT_ALPHA_INTENSITY_88 0xd
+
+/* from 3Dfx Interactive Inc. g3ext.h */
+#define GR_TEXFMT_ARGB_CMP_FXT1      0x11
+
+#define GR_TEXFMT_ARGB_8888          0x12
+
+#define GR_TEXFMT_ARGB_CMP_DXT1      0x16
+#define GR_TEXFMT_ARGB_CMP_DXT3      0x18
+#define GR_TEXFMT_ARGB_CMP_DXT5      0x1A
+#endif /* GLIDE3 */
+
+struct GHQTexInfo {
+  unsigned char *data;
+  int width;
+  int height;
+  unsigned short format;
+
+  int smallLodLog2;
+  int largeLodLog2;
+  int aspectRatioLog2;
+
+  int tiles;
+  int untiled_width;
+  int untiled_height;
+
+  unsigned char is_hires_tex;
+};
+
+/* Callback to display hires texture info.
+ * Gonetz <gonetz(at)ngs.ru>
+ *
+ * void DispInfo(const char *format, ...)
+ * {
+ *   va_list args;
+ *   char buf[INFO_BUF];
+ *
+ *   va_start(args, format);
+ *   vsprintf(buf, format, args);
+ *   va_end(args);
+ *
+ *   printf(buf);
+ * }
+ */
+#define INFO_BUF 4095
+typedef void (*dispInfoFuncExt)(const wchar_t *format, ...);
+
+#ifndef TXFILTER_DLL
+boolean ext_ghq_init(int maxwidth, /* maximum texture width supported by hardware */
+                     int maxheight,/* maximum texture height supported by hardware */
+                     int maxbpp,   /* maximum texture bpp supported by hardware */
+                     int options,  /* options */
+                     int cachesize,/* cache textures to system memory */
+                     wchar_t *path,   /* plugin directory. must be smaller than MAX_PATH */
+                     wchar_t *ident,  /* name of ROM. must be no longer than 64 in character. */
+                     dispInfoFuncExt callback /* callback function to display info */
+                     );
+
+void ext_ghq_shutdown(void);
+
+boolean ext_ghq_txfilter(unsigned char *src,        /* input texture */
+                         int srcwidth,              /* width of input texture */
+                         int srcheight,             /* height of input texture */
+                         unsigned short srcformat,  /* format of input texture */
+                         uint64 g64crc,             /* glide64 crc */
+                         GHQTexInfo *info           /* output */
+                         );
+
+boolean ext_ghq_hirestex(uint64 g64crc,             /* glide64 crc */
+                         uint64 r_crc64,            /* checksum hi:palette low:texture */
+                         unsigned short *palette,   /* palette for CI textures */
+                         GHQTexInfo *info           /* output */
+                         );
+
+uint64 ext_ghq_checksum(unsigned char *src, /* input texture */
+                        int width,          /* width of texture */
+                        int height,         /* height of texture */
+                        int size,           /* type of texture pixel */
+                        int rowStride,      /* row stride in bytes */
+                        unsigned char *palette /* palette */
+                        );
+
+boolean ext_ghq_dmptx(unsigned char *src,   /* input texture (must be in 3Dfx Glide format) */
+                      int width,            /* width of texture */
+                      int height,           /* height of texture */
+                      int rowStridePixel,   /* row stride of input texture in pixels */
+                      unsigned short gfmt,  /* glide format of input texture */
+                      unsigned short n64fmt,/* N64 format hi:format low:size */
+                      uint64 r_crc64        /* checksum hi:palette low:texture */
+                      );
+
+boolean ext_ghq_reloadhirestex();
+#endif /* TXFILTER_DLL */
+
+#endif /* __EXT_TXFILTER_H__ */
diff --git a/GLideNHQ/GlideHQ.rc b/GLideNHQ/GlideHQ.rc
new file mode 100644
index 00000000..9346c06e
--- /dev/null
+++ b/GLideNHQ/GlideHQ.rc
@@ -0,0 +1,79 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "bldno.h"
+
+#define ID(id)     id
+
+#define VS_FILE_INFO		ID(16)		/* Version stamp res type */
+#define VS_VERSION_INFO		ID(1)  		/* Version stamp res ID */
+#define VS_USER_DEFINED		ID(100)		/* User-defined res IDs */
+
+#define VOS_DOS_WINDOWS32       0x00010004L
+#define VFT_DLL                 0x00000002L
+#define VER_PRERELEASE              0
+#define VER_PRIVATEBUILD            0
+#define VER_DEBUG                   0
+
+#define VERSIONNAME             "GlideHQ.dll\0"
+#define PRODNAME                "GlideHQ\0"
+#define DESCRIPT                "Texture filtering DLL\0"
+#define COPYRIGHTSTR            "Copyright (C) 2007 Hiroshi Morii\0"
+#define CONTACTSTR              "Hiroshi Morii <koolsmoky@users.sourceforge.net> http://www.3dfxzone.it/koolsmoky\0"
+
+#define MANVERSION              1
+#define MANREVISION             02
+#define MINVERSION              00
+
+#define VERSIONSTR "1.02.00." BUILD_NUMBER_STR
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
+ PRODUCTVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
+ FILEFLAGSMASK 0x0030003FL
+ FILEFLAGS (VER_PRIVATEBUILD|VER_PRERELEASE|VER_DEBUG)
+
+ FILEOS VOS_DOS_WINDOWS32
+ FILETYPE VFT_DLL
+ FILESUBTYPE 0
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904E4"
+        BEGIN
+            VALUE "FileDescription", DESCRIPT
+            VALUE "FileVersion", VERSIONSTR
+            VALUE "InternalName", VERSIONNAME
+            VALUE "LegalCopyright", COPYRIGHTSTR
+            VALUE "OriginalFilename", VERSIONNAME
+            VALUE "ProductName", PRODNAME
+            VALUE "ProductVersion", VERSIONSTR
+            VALUE "Contact", CONTACTSTR
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        /* the following line should be extended for localized versions */
+        VALUE "Translation", 0x409, 1252
+    END
+END
diff --git a/GLideNHQ/Makefile.gcc b/GLideNHQ/Makefile.gcc
new file mode 100644
index 00000000..e4fa1aef
--- /dev/null
+++ b/GLideNHQ/Makefile.gcc
@@ -0,0 +1,126 @@
+# This MUST be processed by GNU make
+#
+# Texture Filtering Linux Makefile
+# Version:  1.0
+#
+# Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+# Email koolsmoky(at)users.sourceforge.net
+# Web   http://www.3dfxzone.it/koolsmoky
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+#
+#  Available options:
+#
+#    Environment variables:
+#	DEBUG=1		enable debugging checks and messages
+#			default = no
+#
+#    Environment variables:
+#
+#    Targets:
+#	all:		build dynamic module
+#	clean:		remove object files
+#	realclean:	remove all generated files
+#
+#
+#  Requirements:
+#
+#    Compiler:
+#       GCC 4.2
+#
+#    Libraries:
+#       boost  1.41.0   (http://www.boost.org)  bjam --toolset=gcc link=static runtime-link=static variant=release,debug stage
+#       libpng 1.4.1    (http://www.libpng.org) static library
+#       zlib   1.2.3    (http://www.zlib.org)   static library
+#       tc     1.1+                             patched for ARGB input and ATI Radeon workaround
+# 
+
+#
+# GCC does not have SEH (structured exception handling)
+#
+
+.PHONY: all clean realclean
+
+DLLNAME = GlideHQ.so
+
+# libpng and zlib headers & libraries
+EXT_INC = ./inc
+EXT_LIB = ./lib
+
+# boost library & headers
+BOOST_INC = ../boost_1_41_0
+BOOST_LIB = ../boost_1_41_0/stage/lib
+
+CC = g++
+CFLAGS = -Wall -W -pedantic -ansi -Wno-long-long
+CFLAGS += -O3 -ffast-math -funroll-loops
+#CFLAGS += -fexpensive-optimizations -march=k6
+CFLAGS += -I. -I$(EXT_INC) -I$(BOOST_INC)
+CFLAGS += -fPIC -DPIC
+#CFLAGS += -DTXFILTER_DLL
+
+ifdef DEBUG
+CFLAGS += -DDEBUG
+endif
+
+LD = g++
+LDFLAGS = -shared -lstdc++ -lpthread
+LDLIBS = $(EXT_LIB)/libdxtn.a $(EXT_LIB)/libpng.a $(EXT_LIB)/libz.a $(BOOST_LIB)/libboost_filesystem-gcc42-mt-s.a $(BOOST_LIB)/libboost_system-gcc42-mt-s.a $(BOOST_LIB)/libboost_thread-gcc42-mt-s.a
+
+RM = rm
+
+SOURCES = \
+	TxFilterExport.cpp \
+	TxFilter.cpp \
+	TxCache.cpp \
+	TxTexCache.cpp \
+	TxHiResCache.cpp \
+	TxQuantize.cpp \
+	TxUtil.cpp \
+	TextureFilters.cpp \
+	TextureFilters_2xsai.cpp \
+	TextureFilters_hq2x.cpp \
+	TextureFilters_hq4x.cpp \
+	TxImage.cpp \
+	TxReSample.cpp \
+	TxDbg.cpp
+
+OBJECTS = $(SOURCES:.cpp=.o)
+
+.cpp.o:
+	$(CC) -o $@ $(CFLAGS) -c $<
+
+all: $(DLLNAME)
+
+$(DLLNAME): $(OBJECTS)
+	$(LD) -o $@ $(LDFLAGS) $^ $(LDLIBS)
+
+$(OBJECTS): $(SOURCES) bldno.h
+
+bldno.h: bldno.exe
+	./$< > $@
+
+bldno.exe: bldno.cpp
+	$(CC) -o $@ $(CFLAGS) $<
+
+clean:
+	-$(RM) *.o
+
+realclean: clean
+	-$(RM) $(DLLNAME) bldno.exe bldno.h
+
+-include depend
diff --git a/GLideNHQ/Makefile.vc8 b/GLideNHQ/Makefile.vc8
new file mode 100644
index 00000000..5fe03263
--- /dev/null
+++ b/GLideNHQ/Makefile.vc8
@@ -0,0 +1,144 @@
+# This MUST be processed by GNU make
+#
+# Texture Filtering MSVC Makefile
+# Version:  1.0
+#
+# Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+# Email koolsmoky(at)users.sourceforge.net
+# Web   http://www.3dfxzone.it/koolsmoky
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+#
+#  Available options:
+#
+#    Environment variables:
+#	DEBUG=1		enable debugging checks and messages
+#			default = no
+#
+#    Targets:
+#	all:		build everything
+#	clean:		remove object files
+#	realclean:	remove all generated files
+#
+#
+#  Requirements:
+#
+#    Compiler:
+#       Microsoft VisualC++ 8.0 (VS2005)
+#
+#    Libraries:
+#       boost  1.41.0   (http://www.boost.org)  bjam --toolset=msvc-8.0 link=static runtime-link=static variant=release,debug stage
+#       libpng 1.4.1    (http://www.libpng.org) built with /MT or /MTd switch
+#       zlib   1.2.3    (http://www.zlib.org)   built with /MT or /MTd switch
+#       tc     1.1+                             patched for ARGB input and ATI Radeon workaround
+#
+#       All libraries must be built with /MT or /MTd switch and statically linked.
+#       If you need any help feel free to drop me a line.
+#
+
+.PHONY: all clean realclean
+.SUFFIXES: .cpp .obj .rc .res
+
+# RESNAME = txfilter.res
+# DLLNAME = txfilter.dll
+# LIBNAME = txfilter.lib
+# EXPNAME = txfilter.exp
+RESNAME = GlideHQ.res
+DLLNAME = GlideHQ.dll
+LIBNAME = GlideHQ.lib
+EXPNAME = GlideHQ.exp
+
+# libpng and zlib headers & libraries
+EXT_INC = ./inc
+EXT_LIB = ./lib
+
+# boost library & headers
+BOOST_INC = ../boost_1_41_0
+BOOST_LIB = ../boost_1_41_0/stage/lib
+
+CC = cl
+AS = nasm
+LD = _link # change this to suite your build environment
+RC = rc
+
+UNLINK = $(RM) $(1)
+
+LDFLAGS = -nologo -dll -opt:WIN98 -machine:X86 -nodefaultlib
+
+ASFLAGS = -O6 -fwin32 -D__WIN32__ --prefix _
+ASFLAGS += $(CDEFS)
+
+CFLAGS  = -nologo -W3 -WX -D__MSC__ -DWIN32 -D_WINDOWS -D_WINDLL -EHa -D_CRT_SECURE_NO_DEPRECATE -DTXFILTER_DLL
+
+LDLIBS = user32.lib kernel32.lib
+ifdef DEBUG
+CFLAGS += -Zi -DDEBUG -MTd
+LDLIBS += $(EXT_LIB)/dxtnd.lib $(EXT_LIB)/libpngd.lib $(EXT_LIB)/zlibd.lib LIBCMTD.LIB LIBCPMTD.LIB $(BOOST_LIB)/libboost_filesystem-vc80-mt-sgd.lib $(BOOST_LIB)/libboost_system-vc80-mt-sgd.lib $(BOOST_LIB)/libboost_thread-vc80-mt-sgd.lib
+OPTFLAGS ?= -Od
+else
+CFLAGS += -DNDEBUG -GL -MT
+LDFLAGS += -ltcg:STATUS
+LDLIBS += $(EXT_LIB)/dxtn.lib $(EXT_LIB)/libpng.lib $(EXT_LIB)/zlib.lib LIBCMT.LIB LIBCPMT.LIB $(BOOST_LIB)/libboost_filesystem-vc80-mt-s.lib $(BOOST_LIB)/libboost_system-vc80-mt-s.lib $(BOOST_LIB)/libboost_thread-vc80-mt-s.lib
+OPTFLAGS ?= -O2
+endif
+
+CFLAGS += -I. -I$(EXT_INC) -I$(BOOST_INC)
+CFLAGS += $(CDEFS) $(OPTFLAGS)
+
+SOURCES = \
+	TxFilterExport.cpp \
+	TxFilter.cpp \
+	TxCache.cpp \
+	TxTexCache.cpp \
+	TxHiResCache.cpp \
+	TxQuantize.cpp \
+	TxUtil.cpp \
+	TextureFilters.cpp \
+	TextureFilters_2xsai.cpp \
+	TextureFilters_hq2x.cpp \
+	TextureFilters_hq4x.cpp \
+	TxImage.cpp \
+	TxReSample.cpp \
+	TxDbg.cpp
+
+
+OBJECTS = $(SOURCES:.cpp=.obj)
+
+.cpp.obj:
+	$(CC) -Fo$@ $(CFLAGS) -c $<
+
+.rc.res:
+	$(RC) -Fo$@ $(CDEFS) $<
+
+all: $(DLLNAME)
+
+$(DLLNAME): $(OBJECTS) $(RESNAME)
+	$(LD) -out:$@ $(LDFLAGS) $(OBJECTS) $(LDLIBS) $(RESNAME)
+
+$(OBJECTS): $(SOURCES) bldno.h
+
+bldno.h: bldno.exe
+	./$< > $@
+
+bldno.exe: bldno.cpp
+	$(CC) -o $@ $(CFLAGS) $<
+
+clean:
+	-$(RM) *.obj *.pdb $(RESNAME) bldno.h *.ilk
+
+realclean: clean
+	-$(RM) $(DLLNAME) $(LIBNAME) $(EXPNAME) bldno.exe
diff --git a/GLideNHQ/README.txt b/GLideNHQ/README.txt
new file mode 100644
index 00000000..c903e573
--- /dev/null
+++ b/GLideNHQ/README.txt
@@ -0,0 +1,94 @@
+/*
+ * GlideHQ (Texture enhancer library for Glide64)
+ * Version:  1.5
+ *
+ * Copyright (C) 2007  Hiroshi Morii aka KoolSmoky   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+About:
+This is a realtime texture enhancer library with hi-resolution texture
+pack support for Glide64 (http://glide64.emuxhaven.net). Traditional and
+non-traditional techniques have been used to achieve speed and high image
+quality even on a 9 year old 3Dfx Voodoo2.
+
+Although the 3Dfx Glide3x texture format naming conventions are used, the
+library can be expanded for generic use.
+
+Supported:
+OS: 32bit Linux and MS Windows
+Enhancers: Hq4x, Hq2x, Hq2xS, Lq2x, Lq2xS, Super2xSai, x2
+Filters: Smooth (1,2,3,4), Sharp (1,2)
+Compressors: FXT1, S3TC
+Input formats:  GR_TEXFMT_ALPHA_8,
+                GR_TEXFMT_RGB_565,
+                GR_TEXFMT_ARGB_1555,
+                GR_TEXFMT_ARGB_4444,
+                GR_TEXFMT_ARGB_8888,
+                GR_TEXFMT_ALPHA_INTENSITY_44,
+                GR_TEXFMT_ALPHA_INTENSITY_88
+Output formats: Same as input unless compression or hires packs are used.
+Hires texture packs: Rice format (Jabo and GlideHQ format coming later)
+
+Acknowledgments:
+I hope you enjoy GlideHQ (texture enhancer library for Glide64). Greatest
+thanks to Gonetz for making this happen in his busy time. We've rushed
+everything to share the eye-candy with all of you N64 emulation fans. I
+would also like to thank a great friend of mine, Daniel Borca for providing
+the texture compression code, Maxim Stepin (hq2x 4x), and Derek Liauw Kie Fa
+(2xSaI) for the filtering engines, Rice for his N64 graphics plugin source
+code, and Mudlord for the hq2xS lq2xS code. GlideHQ also uses the boost C++
+libraries, zlib general purpose compression library, and the Portable Network
+Graphics library. Thanks to all the developers for making them available. And
+special thanks to the Glide64 beta testing crew. Without their feedbacks
+this library would not have seen daylight. Thank you all.
+
+The source code for GlideHQ is released in hopes that it will be improved.
+I know the coding is not on par after so much late night caffeine boosts.
+If you have suggestions or modifications, please feel free to post them on
+the Glide64 forum at emuxhaven.
+
+Porting the library to other platforms should not be so hard. The coding is
+done with cross platform compatibility in mind and will build with GCC and
+GNU make. Currently supported are 32bit Linux and MS Windows.
+
+If you are looking for driver updates for your 3Dfx Interactive Inc. gfx 
+card, grab them from the forums at http://www.3dfxzone.it/enboard/
+Unbelievable as it seems, drivers are still being updated after 6 years
+from 3Dfx's demise.
+
+I know N64 rules, anyone up for PSX? :))
+
+-KoolSmoky
+
+References:
+[1] R.W. Floyd & L. Steinberg, An adaptive algorithm for spatial grey scale,
+    Proceedings of the Society of Information Display 17, pp75-77, 1976
+[2] Ken Turkowski, Filters for Common Resampling Tasks, Apple Computer 1990
+    http://www.worldserver.com/turk/computergraphics/ResamplingFilters.pdf
+[3] Don P. Mitchell and Arun N. Netravali, Reconstruction Filters in Computer
+    Graphics, SIGGRAPH '88, Proceedings of the 15th annual conference on
+    Computer graphics and interactive techniques, pp221-228, 1988
+[4] J. F. Kaiser and W. A. Reed, Data smoothing using low-pass digital
+    filters, Rev. Sci. instrum. 48 (11), pp1447-1457, 1977
+[5] Maxim Stepin, hq4x Magnification Filter, http://www.hiend3d.com/hq4x.html
+[6] Derek Liauw Kie Fa, 2xSaI, http://elektron.its.tudelft.nl/~dalikifa
+[7] Dirk Stevens, Eagle engine http://www.retrofx.com/rfxtech.html
+[8] 3DFX_texture_compression_FXT1 and EXT_texture_compression_s3tc extension
+    specs from the OpenGL Extension Registry. http://oss.sgi.com/projects/
+    ogl-sample/registry/
diff --git a/GLideNHQ/TextureFilters.cpp b/GLideNHQ/TextureFilters.cpp
new file mode 100644
index 00000000..e0e2d9b4
--- /dev/null
+++ b/GLideNHQ/TextureFilters.cpp
@@ -0,0 +1,715 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+#include <string.h>
+#include "TextureFilters.h"
+
+/************************************************************************/
+/* 2X filters                                                           */
+/************************************************************************/
+
+#define DWORD_MAKE(r, g, b, a)   ((uint32) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)))
+#define WORD_MAKE(r, g, b, a)   ((uint16) (((a) << 12) | ((r) << 8) | ((g) << 4) | (b)))
+
+// Basic 2x R8G8B8A8 filter with interpolation
+
+void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *pDst1, *pDst2;
+  uint32 *pSrc, *pSrc2;
+  uint32 nWidth = width;
+  uint32 nHeight = height;
+
+  uint32 b1;
+  uint32 g1;
+  uint32 r1;
+  uint32 a1;
+  uint32 b2;
+  uint32 g2;
+  uint32 r2;
+  uint32 a2;
+  uint32 b3;
+  uint32 g3;
+  uint32 r3;
+  uint32 a3;
+  uint32 b4;
+  uint32 g4;
+  uint32 r4;
+  uint32 a4;
+
+  uint32 xSrc;
+  uint32 ySrc;
+
+  for (ySrc = 0; ySrc < nHeight; ySrc++)
+  {
+    pSrc = (uint32*)(((uint8*)srcPtr)+ySrc*srcPitch);
+    pSrc2 = (uint32*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
+    pDst1 = (uint32*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
+    pDst2 = (uint32*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
+
+    for (xSrc = 0; xSrc < nWidth; xSrc++)
+    {
+      b1 = (pSrc[xSrc]>>0)&0xFF;
+      g1 = (pSrc[xSrc]>>8)&0xFF;
+      r1 = (pSrc[xSrc]>>16)&0xFF;
+      a1 = (pSrc[xSrc]>>24)&0xFF;
+
+      if( xSrc<nWidth-1 )
+      {
+        b2 = (pSrc[xSrc+1]>>0)&0xFF;
+        g2 = (pSrc[xSrc+1]>>8)&0xFF;
+        r2 = (pSrc[xSrc+1]>>16)&0xFF;
+        a2 = (pSrc[xSrc+1]>>24)&0xFF;
+      }
+
+      if( ySrc<nHeight-1 )
+      {
+        b3 = (pSrc2[xSrc]>>0)&0xFF;
+        g3 = (pSrc2[xSrc]>>8)&0xFF;
+        r3 = (pSrc2[xSrc]>>16)&0xFF;
+        a3 = (pSrc2[xSrc]>>24)&0xFF;
+        if( xSrc<nWidth-1 )
+        {
+          b4 = (pSrc2[xSrc+1]>>0)&0xFF;
+          g4 = (pSrc2[xSrc+1]>>8)&0xFF;
+          r4 = (pSrc2[xSrc+1]>>16)&0xFF;
+          a4 = (pSrc2[xSrc+1]>>24)&0xFF;
+        }
+      }
+
+
+      // Pixel 1
+      pDst1[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 2
+      if( xSrc<nWidth-1 )
+      {
+        pDst1[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+      }
+      else
+        pDst1[xSrc*2+1] = pSrc[xSrc];
+
+
+      // Pixel 3
+      if( ySrc<nHeight-1 )
+      {
+        pDst2[xSrc*2] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+      }
+      else
+        pDst2[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 4
+      if( xSrc<nWidth-1 )
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
+        }
+        else
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+        }
+      }
+      else
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = DWORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+        }
+        else
+          pDst2[xSrc*2+1] = pSrc[xSrc];
+      }
+    }
+  }
+}
+
+#if !_16BPP_HACK
+// Basic 2x R4G4B4A4 filter with interpolation
+void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *pDst1, *pDst2;
+  uint16 *pSrc, *pSrc2;
+  uint32 nWidth = width;
+  uint32 nHeight = height;
+
+  uint16 b1;
+  uint16 g1;
+  uint16 r1;
+  uint16 a1;
+  uint16 b2;
+  uint16 g2;
+  uint16 r2;
+  uint16 a2;
+  uint16 b3;
+  uint16 g3;
+  uint16 r3;
+  uint16 a3;
+  uint16 b4;
+  uint16 g4;
+  uint16 r4;
+  uint16 a4;
+
+  uint16 xSrc;
+  uint16 ySrc;
+
+  for (ySrc = 0; ySrc < nHeight; ySrc++)
+  {
+    pSrc = (uint16*)(((uint8*)srcPtr)+ySrc*srcPitch);
+    pSrc2 = (uint16*)(((uint8*)srcPtr)+(ySrc+1)*srcPitch);
+    pDst1 = (uint16*)(((uint8*)dstPtr)+(ySrc*2)*dstPitch);
+    pDst2 = (uint16*)(((uint8*)dstPtr)+(ySrc*2+1)*dstPitch);
+
+    for (xSrc = 0; xSrc < nWidth; xSrc++)
+    {
+      b1 = (pSrc[xSrc]>> 0)&0xF;
+      g1 = (pSrc[xSrc]>> 4)&0xF;
+      r1 = (pSrc[xSrc]>> 8)&0xF;
+      a1 = (pSrc[xSrc]>>12)&0xF;
+
+      if( xSrc<nWidth-1 )
+      {
+        b2 = (pSrc[xSrc+1]>> 0)&0xF;
+        g2 = (pSrc[xSrc+1]>> 4)&0xF;
+        r2 = (pSrc[xSrc+1]>> 8)&0xF;
+        a2 = (pSrc[xSrc+1]>>12)&0xF;
+      }
+
+      if( ySrc<nHeight-1 )
+      {
+        b3 = (pSrc2[xSrc]>> 0)&0xF;
+        g3 = (pSrc2[xSrc]>> 4)&0xF;
+        r3 = (pSrc2[xSrc]>> 8)&0xF;
+        a3 = (pSrc2[xSrc]>>12)&0xF;
+        if( xSrc<nWidth-1 )
+        {
+          b4 = (pSrc2[xSrc+1]>> 0)&0xF;
+          g4 = (pSrc2[xSrc+1]>> 4)&0xF;
+          r4 = (pSrc2[xSrc+1]>> 8)&0xF;
+          a4 = (pSrc2[xSrc+1]>>12)&0xF;
+        }
+      }
+
+      // Pixel 1
+      pDst1[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 2
+      if( xSrc<nWidth-1 )
+      {
+        pDst1[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+      }
+      else
+        pDst1[xSrc*2+1] = pSrc[xSrc];
+
+
+      // Pixel 3
+      if( ySrc<nHeight-1 )
+      {
+        pDst2[xSrc*2] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+      }
+      else
+        pDst2[xSrc*2] = pSrc[xSrc];
+
+      // Pixel 4
+      if( xSrc<nWidth-1 )
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2+r3+r4)/4, (g1+g2+g3+g4)/4, (b1+b2+b3+b4)/4, (a1+a2+a3+a4)/4);
+        }
+        else
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r2)/2, (g1+g2)/2, (b1+b2)/2, (a1+a2)/2);
+        }
+      }
+      else
+      {
+        if( ySrc<nHeight-1 )
+        {
+          pDst2[xSrc*2+1] = WORD_MAKE((r1+r3)/2, (g1+g3)/2, (b1+b3)/2, (a1+a3)/2);
+        }
+        else
+          pDst2[xSrc*2+1] = pSrc[xSrc];
+      }
+    }
+  }
+}
+#endif /* !_16BPP_HACK */
+
+/*
+ * Sharp filters
+ * Hiroshi Morii <koolsmoky@users.sourceforge.net>
+ */
+void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint32 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint32 *_src1, *_src2, *_src3, *_dest;
+  uint32 val[4];
+  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter )
+  {
+  case SHARP_FILTER_2:
+    mul1=1;
+    mul2=8;
+    mul3=12;
+    shift4=2;
+    break;
+  case SHARP_FILTER_1:
+  default:
+    mul1=1;
+    mul2=8;
+    mul3=16;
+    shift4=3;
+    break;
+  }
+
+  // setup rows
+  _src1 = src;
+  _src2 = _src1 + srcwidth;
+  _src3 = _src2 + srcwidth;
+  _dest = dest;
+
+  // copy the first row
+  memcpy(_dest, _src1, (srcwidth << 2));
+  _dest += srcwidth;
+  // filter 2nd row to 1 row before the last
+  for (y = 1; y < srcheight-1; y++) {
+    // copy the first pixel
+    _dest[0] = *_src2;
+    // filter 2nd pixel to 1 pixel before last
+    for (x = 1; x < srcwidth-1; x++) {
+      for (z=0; z<4; z++) {
+        t1 = *((uint8*)(_src1+x-1)+z);
+        t2 = *((uint8*)(_src1+x  )+z);
+        t3 = *((uint8*)(_src1+x+1)+z);
+        t4 = *((uint8*)(_src2+x-1)+z);
+        t5 = *((uint8*)(_src2+x  )+z);
+        t6 = *((uint8*)(_src2+x+1)+z);
+        t7 = *((uint8*)(_src3+x-1)+z);
+        t8 = *((uint8*)(_src3+x  )+z);
+        t9 = *((uint8*)(_src3+x+1)+z);
+        
+        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
+          val[z]= ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
+          if (val[z] > 0xFF) val[z] = 0xFF;
+        } else {
+          val[z] = t5;
+        }
+      }
+      _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+    }
+    // copy the ending pixel
+    _dest[srcwidth-1] = *(_src3 - 1);
+    // next row
+    _src1 += srcwidth;
+    _src2 += srcwidth;
+    _src3 += srcwidth;
+    _dest += srcwidth;
+  }
+  // copy the last row
+  memcpy(_dest, _src2, (srcwidth << 2));
+}
+
+#if !_16BPP_HACK
+void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint16 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint16 *_src1, *_src2, *_src3, *_dest;
+  uint16 val[4];
+  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SHARP_FILTER_2:
+    mul1=1;
+    mul2=8;
+    mul3=12;
+    shift4=2;
+    break;
+  case SHARP_FILTER_1:
+  default:
+    mul1=1;
+    mul2=8;
+    mul3=16;
+    shift4=3;
+    break;
+  }
+
+  // setup rows
+  _src1 = src;
+  _src2 = _src1 + srcwidth;
+  _src3 = _src2 + srcwidth;
+  _dest = dest;
+
+  // copy the first row
+  memcpy(_dest, _src1, (srcwidth << 1));
+  _dest += srcwidth;
+  // filter 2nd row to 1 row before the last
+  for( y = 1; y < srcheight - 1; y++) {
+    // copy the first pixel
+    _dest[0] = *_src2;
+    // filter 2nd pixel to 1 pixel before last
+    for( x = 1; x < srcwidth - 1; x++) {
+      for( z = 0; z < 4; z++ ) {
+        /* Hiroshi Morii <koolsmoky@users.sourceforge.net>
+         * Read the entire 16bit pixel and then extract the A,R,G,B components.
+         */
+        uint32 shift = z << 2;
+        t1 = ((*((uint16*)(_src1+x-1))) >> shift) & 0xF;
+        t2 = ((*((uint16*)(_src1+x  ))) >> shift) & 0xF;
+        t3 = ((*((uint16*)(_src1+x+1))) >> shift) & 0xF;
+        t4 = ((*((uint16*)(_src2+x-1))) >> shift) & 0xF;
+        t5 = ((*((uint16*)(_src2+x  ))) >> shift) & 0xF;
+        t6 = ((*((uint16*)(_src2+x+1))) >> shift) & 0xF;
+        t7 = ((*((uint16*)(_src3+x-1))) >> shift) & 0xF;
+        t8 = ((*((uint16*)(_src3+x  ))) >> shift) & 0xF;
+        t9 = ((*((uint16*)(_src3+x+1))) >> shift) & 0xF;
+        
+        if( (t5*mul2) > (t1+t3+t7+t9+t2+t4+t6+t8)*mul1 ) {
+          val[z] = ((t5*mul3) - (t1+t3+t7+t9+t2+t4+t6+t8)*mul1)>>shift4;
+          if (val[z] > 0xF) val[z] = 0xF;
+        } else {
+          val[z] = t5;
+        }
+      }
+      _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+    }
+    // copy the ending pixel
+    _dest[srcwidth-1] = *(_src3 - 1);
+    // next row
+    _src1 += srcwidth;
+    _src2 += srcwidth;
+    _src3 += srcwidth;
+    _dest += srcwidth;
+  }
+  // copy the last row
+  memcpy(_dest, _src2, (srcwidth << 1));
+}
+#endif /* !_16BPP_HACK */
+
+/*
+ * Smooth filters
+ * Hiroshi Morii <koolsmoky@users.sourceforge.net>
+ */
+void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint32 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint32 *_src1, *_src2, *_src3, *_dest;
+  uint32 val[4];
+  uint32 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SMOOTH_FILTER_4:
+    mul1=1;
+    mul2=2;
+    mul3=4;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_3:
+    mul1=1;
+    mul2=1;
+    mul3=8;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_2:
+    mul1=1;
+    mul2=1;
+    mul3=2;
+    shift4=2;
+    break;
+  case SMOOTH_FILTER_1:
+  default:
+    mul1=1;
+    mul2=1;
+    mul3=6;
+    shift4=3;
+    break;
+  }
+
+  switch (filter) {
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 2));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++){
+      // copy the first pixel
+      _dest[0] = _src2[0];
+      // filter 2nd pixel to 1 pixel before last
+      for (x = 1; x < srcwidth - 1; x++) {
+        for (z = 0; z < 4; z++ ) {
+          t1 = *((uint8*)(_src1+x-1)+z);
+          t2 = *((uint8*)(_src1+x  )+z);
+          t3 = *((uint8*)(_src1+x+1)+z);
+          t4 = *((uint8*)(_src2+x-1)+z);
+          t5 = *((uint8*)(_src2+x  )+z);
+          t6 = *((uint8*)(_src2+x+1)+z);
+          t7 = *((uint8*)(_src3+x-1)+z);
+          t8 = *((uint8*)(_src3+x  )+z);
+          t9 = *((uint8*)(_src3+x+1)+z);
+          /* the component value must not overflow 0xFF */
+          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
+          if (val[z] > 0xFF) val[z] = 0xFF;
+        }
+        _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+      }
+      // copy the ending pixel
+      _dest[srcwidth-1] = *(_src3 - 1);
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 2));
+    break;
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  default:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 2));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++) {
+      // filter 1st pixel to the last
+      if (y & 1) {
+        for( x = 0; x < srcwidth; x++) {
+          for( z = 0; z < 4; z++ ) {
+            t2 = *((uint8*)(_src1+x  )+z);
+            t5 = *((uint8*)(_src2+x  )+z);
+            t8 = *((uint8*)(_src3+x  )+z);
+            /* the component value must not overflow 0xFF */
+            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
+            if (val[z] > 0xFF) val[z] = 0xFF;
+          }
+          _dest[x] = val[0]|(val[1]<<8)|(val[2]<<16)|(val[3]<<24);
+        }
+      } else {
+         memcpy(_dest, _src2, (srcwidth << 2));
+      }
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 2));
+    break;
+  }
+}
+
+#if !_16BPP_HACK
+void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter)
+{
+  // NOTE: for now we get away with copying the boundaries
+  //       filter the boundaries if we face problems
+
+  uint16 mul1, mul2, mul3, shift4;
+
+  uint32 x,y,z;
+  uint16 *_src1, *_src2, *_src3, *_dest;
+  uint16 val[4];
+  uint16 t1,t2,t3,t4,t5,t6,t7,t8,t9;
+
+  switch( filter ) {
+  case SMOOTH_FILTER_4:
+    mul1=1;
+    mul2=2;
+    mul3=4;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_3:
+    mul1=1;
+    mul2=1;
+    mul3=8;
+    shift4=4;
+    break;
+  case SMOOTH_FILTER_2:
+    mul1=1;
+    mul2=1;
+    mul3=2;
+    shift4=2;
+    break;
+  case SMOOTH_FILTER_1:
+  default:
+    mul1=1;
+    mul2=1;
+    mul3=6;
+    shift4=3;
+    break;
+  }
+
+  switch (filter) {
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 1));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for (y = 1; y < srcheight - 1; y++) {
+      // copy the first pixel
+      _dest[0] = *_src2;
+      // filter 2nd pixel to 1 pixel before last
+      for (x = 1; x < srcwidth - 1; x++) {
+        for (z = 0; z < 4; z++ ) {
+          /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
+          uint32 shift = z << 2;
+          t1 = ((*(uint16*)(_src1+x-1)) >> shift) & 0xF;
+          t2 = ((*(uint16*)(_src1+x  )) >> shift) & 0xF;
+          t3 = ((*(uint16*)(_src1+x+1)) >> shift) & 0xF;
+          t4 = ((*(uint16*)(_src2+x-1)) >> shift) & 0xF;
+          t5 = ((*(uint16*)(_src2+x  )) >> shift) & 0xF;
+          t6 = ((*(uint16*)(_src2+x+1)) >> shift) & 0xF;
+          t7 = ((*(uint16*)(_src3+x-1)) >> shift) & 0xF;
+          t8 = ((*(uint16*)(_src3+x  )) >> shift) & 0xF;
+          t9 = ((*(uint16*)(_src3+x+1)) >> shift) & 0xF;
+          /* the component value must not overflow 0xF */
+          val[z] = ((t1+t3+t7+t9)*mul1+((t2+t4+t6+t8)*mul2)+(t5*mul3))>>shift4;
+          if (val[z] > 0xF) val[z] = 0xF;
+        }
+        _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+      }
+      // copy the ending pixel
+      _dest[srcwidth-1] = *(_src3 - 1);
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 1));
+    break;
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  default:
+    // setup rows
+    _src1 = src;
+    _src2 = _src1 + srcwidth;
+    _src3 = _src2 + srcwidth;
+    _dest = dest;
+    // copy the first row
+    memcpy(_dest, _src1, (srcwidth << 1));
+    _dest += srcwidth;
+    // filter 2nd row to 1 row before the last
+    for( y = 1; y < srcheight - 1; y++) {
+      if (y & 1) {
+        for( x = 0; x < srcwidth; x++) {
+          for( z = 0; z < 4; z++ ) {
+            /* Read the entire 16bit pixel and then extract the A,R,G,B components. */
+            uint32 shift = z << 2;
+            t2 = ((*(uint16*)(_src1+x)) >> shift) & 0xF;
+            t5 = ((*(uint16*)(_src2+x)) >> shift) & 0xF;
+            t8 = ((*(uint16*)(_src3+x)) >> shift) & 0xF;
+            /* the component value must not overflow 0xF */
+            val[z] = ((t2+t8)*mul2+(t5*mul3))>>shift4;
+            if (val[z] > 0xF) val[z] = 0xF;
+          }
+          _dest[x] = val[0]|(val[1]<<4)|(val[2]<<8)|(val[3]<<12);
+        }
+      } else {
+         memcpy(_dest, _src2, (srcwidth << 1));
+      }
+      // next row
+      _src1 += srcwidth;
+      _src2 += srcwidth;
+      _src3 += srcwidth;
+      _dest += srcwidth;
+    }
+    // copy the last row
+    memcpy(_dest, _src2, (srcwidth << 1));
+    break;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter) {
+  switch (filter & ENHANCEMENT_MASK) {
+  case HQ4X_ENHANCEMENT:
+    hq4x_8888((uint8*)src, (uint8*)dest, srcwidth, srcheight, srcwidth, (srcwidth << 4));
+    return;
+  case HQ2X_ENHANCEMENT:
+    hq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case HQ2XS_ENHANCEMENT:
+    hq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case LQ2X_ENHANCEMENT:
+    lq2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case LQ2XS_ENHANCEMENT:
+    lq2xS_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  case X2SAI_ENHANCEMENT:
+    Super2xSaI_8888((uint32*)src, (uint32*)dest, srcwidth, srcheight, srcwidth);
+    return;
+  case X2_ENHANCEMENT:
+    Texture2x_32((uint8*)src, (srcwidth << 2), (uint8*)dest, (srcwidth << 3), srcwidth, srcheight);
+    return;
+  }
+
+  switch (filter & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
+  case SMOOTH_FILTER_1:
+  case SMOOTH_FILTER_2:
+  case SMOOTH_FILTER_3:
+  case SMOOTH_FILTER_4:
+    SmoothFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SMOOTH_FILTER_MASK));
+    return;
+  case SHARP_FILTER_1:
+  case SHARP_FILTER_2:
+    SharpFilter_8888((uint32*)src, srcwidth, srcheight, (uint32*)dest, (filter & SHARP_FILTER_MASK));
+    return;
+  }
+}
diff --git a/GLideNHQ/TextureFilters.h b/GLideNHQ/TextureFilters.h
new file mode 100644
index 00000000..7830eac5
--- /dev/null
+++ b/GLideNHQ/TextureFilters.h
@@ -0,0 +1,81 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#ifndef __TEXTUREFILTERS_H__
+#define __TEXTUREFILTERS_H__
+
+/* 16bpp filters are somewhat buggy and output image is not clean.
+ * Since there's not much time, we'll just convert them to ARGB8888
+ * and use 32bpp filters until fixed.
+ * (1:enable hack, 0:disable hack) */
+#define _16BPP_HACK 1
+
+#include "TxInternal.h"
+
+/* enhancers */
+void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+
+void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch);
+
+void Texture2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+/* filters */
+void SharpFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+void SmoothFilter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+/* helper */
+void filter_8888(uint32 *src, uint32 srcwidth, uint32 srcheight, uint32 *dest, uint32 filter);
+
+#if !_16BPP_HACK
+void hq4x_init(void);
+void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+void hq4x_565 (unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL);
+
+void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void hq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_565 (uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch);
+void Super2xSaI_8   (uint8  *srcPtr, uint8  *destPtr, uint32 width, uint32 height, uint32 pitch);
+
+void Texture2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+void SharpFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter);
+
+void SmoothFilter_4444(uint16 *src, uint32 srcwidth, uint32 srcheight, uint16 *dest, uint32 filter);
+#endif
+
+#endif /* __TEXTUREFILTERS_H__ */
diff --git a/GLideNHQ/TextureFilters_2xsai.cpp b/GLideNHQ/TextureFilters_2xsai.cpp
new file mode 100644
index 00000000..38226df2
--- /dev/null
+++ b/GLideNHQ/TextureFilters_2xsai.cpp
@@ -0,0 +1,155 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */
+
+#include "TextureFilters.h"
+
+#define GET_RESULT(A, B, C, D) ((A != C || A != D) - (B != C || B != D))
+
+void Super2xSaI_8888(uint32 *srcPtr, uint32 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_8888(A, B) ((A & 0xFEFEFEFE) >> 1) + ((B & 0xFEFEFEFE) >> 1) + (A & B & 0x01010101)
+#define SAI_Q_INTERPOLATE_8888(A, B, C, D) ((A & 0xFCFCFCFC) >> 2) + ((B & 0xFCFCFCFC) >> 2) + ((C & 0xFCFCFCFC) >> 2) + ((D & 0xFCFCFCFC) >> 2) \
+  + ((((A & 0x03030303) + (B & 0x03030303) + (C & 0x03030303) + (D & 0x03030303)) >> 2) & 0x03030303)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_8888
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8888
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+  
+  uint32 color4, color5, color6;
+  uint32 color1, color2, color3;
+  uint32 colorA0, colorA1, colorA2, colorA3;
+  uint32 colorB0, colorB1, colorB2, colorB3;
+  uint32 colorS1, colorS2;
+  uint32 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+#if !_16BPP_HACK
+void Super2xSaI_4444(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_4444(A, B) ((A & 0xEEEE) >> 1) + ((B & 0xEEEE) >> 1) + (A & B & 0x1111)
+#define SAI_Q_INTERPOLATE_4444(A, B, C, D) ((A & 0xCCCC) >> 2) + ((B & 0xCCCC) >> 2) + ((C & 0xCCCC) >> 2) + ((D & 0xCCCC) >> 2) \
+  + ((((A & 0x3333) + (B & 0x3333) + (C & 0x3333) + (D & 0x3333)) >> 2) & 0x3333)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_4444
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_4444
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_1555(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_1555(A, B) ((A & 0x7BDE) >> 1) + ((B & 0x7BDE) >> 1) + (A & B & 0x8421)
+#define SAI_Q_INTERPOLATE_1555(A, B, C, D) ((A & 0x739C) >> 2) + ((B & 0x739C) >> 2) + ((C & 0x739C) >> 2) + ((D & 0x739C) >> 2) \
+  + ((((A & 0x8C63) + (B & 0x8C63) + (C & 0x8C63) + (D & 0x8C63)) >> 2) & 0x8C63)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_1555
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_1555
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_565(uint16 *srcPtr, uint16 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_565(A, B) ((A & 0xF7DE) >> 1) + ((B & 0xF7DE) >> 1) + (A & B & 0x0821)
+#define SAI_Q_INTERPOLATE_565(A, B, C, D) ((A & 0xE79C) >> 2) + ((B & 0xE79C) >> 2) + ((C & 0xE79C) >> 2) + ((D & 0xE79C) >> 2) \
+  + ((((A & 0x1863) + (B & 0x1863) + (C & 0x1863) + (D & 0x1863)) >> 2) & 0x1863)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_565
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_565
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint16 color4, color5, color6;
+  uint16 color1, color2, color3;
+  uint16 colorA0, colorA1, colorA2, colorA3;
+  uint16 colorB0, colorB1, colorB2, colorB3;
+  uint16 colorS1, colorS2;
+  uint16 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+
+void Super2xSaI_8(uint8 *srcPtr, uint8 *destPtr, uint32 width, uint32 height, uint32 pitch)
+{
+#define SAI_INTERPOLATE_8(A, B) ((A & 0xFE) >> 1) + ((B & 0xFE) >> 1) + (A & B & 0x01)
+#define SAI_Q_INTERPOLATE_8(A, B, C, D) ((A & 0xFC) >> 2) + ((B & 0xFC) >> 2) + ((C & 0xFC) >> 2) + ((D & 0xFC) >> 2) \
+  + ((((A & 0x03) + (B & 0x03) + (C & 0x03) + (D & 0x03)) >> 2) & 0x03)
+
+#define SAI_INTERPOLATE SAI_INTERPOLATE_8
+#define SAI_Q_INTERPOLATE SAI_Q_INTERPOLATE_8
+
+  uint32 destWidth = width << 1;
+  uint32 destHeight = height << 1;
+
+  uint8 color4, color5, color6;
+  uint8 color1, color2, color3;
+  uint8 colorA0, colorA1, colorA2, colorA3;
+  uint8 colorB0, colorB1, colorB2, colorB3;
+  uint8 colorS1, colorS2;
+  uint8 product1a, product1b, product2a, product2b;
+
+#include "TextureFilters_2xsai.h"
+
+#undef SAI_INTERPOLATE
+#undef SAI_Q_INTERPOLATE
+}
+#endif /* !_16BPP_HACK */
diff --git a/GLideNHQ/TextureFilters_2xsai.h b/GLideNHQ/TextureFilters_2xsai.h
new file mode 100644
index 00000000..f6696ae0
--- /dev/null
+++ b/GLideNHQ/TextureFilters_2xsai.h
@@ -0,0 +1,145 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Based on Derek Liauw Kie Fa and Rice1964 Super2xSaI code */
+
+  int row0, row1, row2, row3;
+  int col0, col1, col2, col3;
+
+  uint16 x;
+  uint16 y;
+
+  for (y = 0; y < height; y++) {
+    if ((y > 0) && (y < height - 1)) {
+      row0 = width;
+      row0 = -row0;
+      row1 = 0;
+      row2 = width;
+      row3 = (y == height - 2 ? width : width << 1);
+    } else {
+      row0 = 0;
+      row1 = 0;
+      row2 = 0;
+      row3 = 0;
+    }
+
+    for (x = 0; x < width; x++) {
+//--------------------------------------- B0 B1 B2 B3
+//                                         4  5  6 S2
+//                                         1  2  3 S1
+//                                        A0 A1 A2 A3
+      if ((x > 0) && (x < width - 1)) {
+        col0 = -1;
+        col1 = 0;
+        col2 = 1;
+        col3 = (x == width - 2 ? 1 : 2);
+      } else {
+        col0 = 0;
+        col1 = 0;
+        col2 = 0;
+        col3 = 0;
+      }
+
+      colorB0 = *(srcPtr + col0 + row0);
+      colorB1 = *(srcPtr + col1 + row0);
+      colorB2 = *(srcPtr + col2 + row0);
+      colorB3 = *(srcPtr + col3 + row0);
+
+      color4 = *(srcPtr + col0 + row1);
+      color5 = *(srcPtr + col1 + row1);
+      color6 = *(srcPtr + col2 + row1);
+      colorS2 = *(srcPtr + col3 + row1);
+
+      color1 = *(srcPtr + col0 + row2);
+      color2 = *(srcPtr + col1 + row2);
+      color3 = *(srcPtr + col2 + row2);
+      colorS1 = *(srcPtr + col3 + row2);
+
+      colorA0 = *(srcPtr + col0 + row3);
+      colorA1 = *(srcPtr + col1 + row3);
+      colorA2 = *(srcPtr + col2 + row3);
+      colorA3 = *(srcPtr + col3 + row3);
+
+//--------------------------------------
+      if (color2 == color6 && color5 != color3)
+        product2b = product1b = color2;
+      else if (color5 == color3 && color2 != color6)
+        product2b = product1b = color5;
+      else if (color5 == color3 && color2 == color6) {
+        int r = 0;
+
+        r += GET_RESULT(color6, color5, color1, colorA1);
+        r += GET_RESULT(color6, color5, color4, colorB1);
+        r += GET_RESULT(color6, color5, colorA2, colorS1);
+        r += GET_RESULT(color6, color5, colorB2, colorS2);
+
+        if (r > 0)
+          product2b = product1b = color6;
+        else if (r < 0)
+          product2b = product1b = color5;
+        else
+          product2b = product1b = SAI_INTERPOLATE(color5, color6);
+
+      } else {
+
+        if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
+          product2b = SAI_Q_INTERPOLATE(color3, color3, color3, color2);
+        else if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
+          product2b = SAI_Q_INTERPOLATE(color2, color2, color2, color3);
+        else
+          product2b = SAI_INTERPOLATE(color2, color3);
+
+        if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
+          product1b = SAI_Q_INTERPOLATE(color6, color6, color6, color5);
+        else if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
+          product1b = SAI_Q_INTERPOLATE(color6, color5, color5, color5);
+        else
+          product1b = SAI_INTERPOLATE(color5, color6);
+      }
+
+      if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
+        product2a = SAI_INTERPOLATE(color2, color5);
+      else if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
+        product2a = SAI_INTERPOLATE(color2, color5);
+      else
+        product2a = color2;
+
+      if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
+        product1a = SAI_INTERPOLATE(color2, color5);
+      else if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
+        product1a = SAI_INTERPOLATE(color2, color5);
+      else
+        product1a = color5;
+
+
+      destPtr[0] = product1a;
+      destPtr[1] = product1b;
+      destPtr[destWidth] = product2a;
+      destPtr[destWidth + 1] = product2b;
+
+      srcPtr++;
+      destPtr += 2;
+    }
+    srcPtr += (pitch-width);
+    destPtr += (((pitch-width)<<1)+(pitch<<1));
+  }
diff --git a/GLideNHQ/TextureFilters_hq2x.cpp b/GLideNHQ/TextureFilters_hq2x.cpp
new file mode 100644
index 00000000..33cb9953
--- /dev/null
+++ b/GLideNHQ/TextureFilters_hq2x.cpp
@@ -0,0 +1,1510 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+/* 2007 Mudlord - Added hq2xS lq2xS filters */
+
+#include "TextureFilters.h"
+
+/************************************************************************/
+/* hq2x filters                                                         */
+/************************************************************************/
+
+/***************************************************************************/
+/* Basic types */
+
+/***************************************************************************/
+/* interpolation */
+
+//static unsigned interp_bits_per_pixel;
+
+#if !_16BPP_HACK
+#define INTERP_16_MASK_1_3(v) ((v)&0x0F0F)
+#define INTERP_16_MASK_SHIFT_2_4(v) (((v)&0xF0F0)>>4)
+#define INTERP_16_MASK_SHIFTBACK_2_4(v) ((INTERP_16_MASK_1_3(v))<<4)
+
+static uint16 hq2x_interp_16_521(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*2 + INTERP_16_MASK_1_3(p3)*1) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*2 + INTERP_16_MASK_SHIFT_2_4(p3)*1) / 8);
+}
+
+static uint16 hq2x_interp_16_332(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)*2) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 8);
+}
+
+static uint16 hq2x_interp_16_611(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*6 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*6 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint16 hq2x_interp_16_71(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*7 + INTERP_16_MASK_1_3(p2)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*7 + INTERP_16_MASK_SHIFT_2_4(p2)) / 8);
+}
+
+static uint16 hq2x_interp_16_211(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*2 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 4)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*2 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 4);
+}
+
+static uint16 hq2x_interp_16_772(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3(((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2))*7 + INTERP_16_MASK_1_3(p3)*2) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4(((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2))*7 + INTERP_16_MASK_SHIFT_2_4(p3)*2) / 16);
+}
+
+static uint16 hq2x_interp_16_11(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1) + INTERP_16_MASK_1_3(p2)) / 2)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1) + INTERP_16_MASK_SHIFT_2_4(p2)) / 2);
+}
+
+static uint16 hq2x_interp_16_31(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*3 + INTERP_16_MASK_1_3(p2)) / 4)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*3 + INTERP_16_MASK_SHIFT_2_4(p2)) / 4);
+}
+
+static uint16 hq2x_interp_16_1411(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*14 + INTERP_16_MASK_1_3(p2) + INTERP_16_MASK_1_3(p3)) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*14 + INTERP_16_MASK_SHIFT_2_4(p2) + INTERP_16_MASK_SHIFT_2_4(p3)) / 16);
+}
+
+static uint16 hq2x_interp_16_431(uint16 p1, uint16 p2, uint16 p3)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*4 + INTERP_16_MASK_1_3(p2)*3 + INTERP_16_MASK_1_3(p3)) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*4 + INTERP_16_MASK_SHIFT_2_4(p2)*3 + INTERP_16_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint16 hq2x_interp_16_53(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*5 + INTERP_16_MASK_1_3(p2)*3) / 8)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*5 + INTERP_16_MASK_SHIFT_2_4(p2)*3) / 8);
+}
+
+static uint16 hq2x_interp_16_151(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*15 + INTERP_16_MASK_1_3(p2)) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*15 + INTERP_16_MASK_SHIFT_2_4(p2)) / 16);
+}
+
+static uint16 hq2x_interp_16_97(uint16 p1, uint16 p2)
+{
+  return INTERP_16_MASK_1_3((INTERP_16_MASK_1_3(p1)*9 + INTERP_16_MASK_1_3(p2)*7) / 16)
+    | INTERP_16_MASK_SHIFTBACK_2_4((INTERP_16_MASK_SHIFT_2_4(p1)*9 + INTERP_16_MASK_SHIFT_2_4(p2)*7) / 16);
+}
+#endif /* !_16BPP_HACK */
+
+#define INTERP_32_MASK_1_3(v) ((v)&0x00FF00FF)
+#define INTERP_32_MASK_SHIFT_2_4(v) (((v)&0xFF00FF00)>>8)
+#define INTERP_32_MASK_SHIFTBACK_2_4(v) (((INTERP_32_MASK_1_3(v))<<8))
+
+static uint32 hq2x_interp_32_521(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*2 + INTERP_32_MASK_1_3(p3)*1) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*2 + INTERP_32_MASK_SHIFT_2_4(p3)*1) / 8);
+}
+
+static uint32 hq2x_interp_32_332(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)*2) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 8);
+}
+
+static uint32 hq2x_interp_32_211(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*2 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 4)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*2 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 4);
+}
+
+static uint32 hq2x_interp_32_611(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*6 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*6 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint32 hq2x_interp_32_71(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*7 + INTERP_32_MASK_1_3(p2)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*7 + INTERP_32_MASK_SHIFT_2_4(p2)) / 8);
+}
+
+static uint32 hq2x_interp_32_772(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3(((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2))*7 + INTERP_32_MASK_1_3(p3)*2) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4(((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2))*7 + INTERP_32_MASK_SHIFT_2_4(p3)*2) / 16);
+}
+
+static uint32 hq2x_interp_32_11(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1) + INTERP_32_MASK_1_3(p2)) / 2)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1) + INTERP_32_MASK_SHIFT_2_4(p2)) / 2);
+}
+
+static uint32 hq2x_interp_32_31(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*3 + INTERP_32_MASK_1_3(p2)) / 4)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*3 + INTERP_32_MASK_SHIFT_2_4(p2)) / 4);
+}
+
+static uint32 hq2x_interp_32_1411(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*14 + INTERP_32_MASK_1_3(p2) + INTERP_32_MASK_1_3(p3)) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*14 + INTERP_32_MASK_SHIFT_2_4(p2) + INTERP_32_MASK_SHIFT_2_4(p3)) / 16);
+}
+
+static uint32 hq2x_interp_32_431(uint32 p1, uint32 p2, uint32 p3)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*4 + INTERP_32_MASK_1_3(p2)*3 + INTERP_32_MASK_1_3(p3)) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*4 + INTERP_32_MASK_SHIFT_2_4(p2)*3 + INTERP_32_MASK_SHIFT_2_4(p3)) / 8);
+}
+
+static uint32 hq2x_interp_32_53(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*5 + INTERP_32_MASK_1_3(p2)*3) / 8)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*5 + INTERP_32_MASK_SHIFT_2_4(p2)*3) / 8);
+}
+
+static uint32 hq2x_interp_32_151(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*15 + INTERP_32_MASK_1_3(p2)) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*15 + INTERP_32_MASK_SHIFT_2_4(p2)) / 16);
+}
+
+static uint32 hq2x_interp_32_97(uint32 p1, uint32 p2)
+{
+  return INTERP_32_MASK_1_3((INTERP_32_MASK_1_3(p1)*9 + INTERP_32_MASK_1_3(p2)*7) / 16)
+    | INTERP_32_MASK_SHIFTBACK_2_4((INTERP_32_MASK_SHIFT_2_4(p1)*9 + INTERP_32_MASK_SHIFT_2_4(p2)*7) / 16);
+}
+
+/***************************************************************************/
+/* diff */
+
+#define INTERP_Y_LIMIT (0x30*4)
+#define INTERP_U_LIMIT (0x07*4)
+#define INTERP_V_LIMIT (0x06*8)
+
+#if !_16BPP_HACK
+static int hq2x_interp_16_diff(uint16 p1, uint16 p2)
+{
+  int r, g, b;
+  int y, u, v;
+
+  if (p1 == p2)
+    return 0;
+
+  b = (int)((p1 & 0x000F) - (p2 & 0x000F));
+  g = (int)((p1 & 0x00F0) - (p2 & 0x00F0)) >> 4;
+  r = (int)((p1 & 0x0F00) - (p2 & 0x0F00)) >> 8;
+
+  y = r + g + b;
+  u = r - b;
+  v = -r + 2*g - b;
+
+  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
+    return 1;
+
+  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
+    return 1;
+
+  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
+    return 1;
+
+  return 0;
+}
+#endif /* !_16BPP_HACK */
+
+static int hq2x_interp_32_diff(uint32 p1, uint32 p2)
+{
+  int r, g, b;
+  int y, u, v;
+
+  if ((p1 & 0xF8F8F8) == (p2 & 0xF8F8F8))
+    return 0;
+
+  b = (int)((p1 & 0xFF) - (p2 & 0xFF));
+  g = (int)((p1 & 0xFF00) - (p2 & 0xFF00)) >> 8;
+  r = (int)((p1 & 0xFF0000) - (p2 & 0xFF0000)) >> 16;
+
+  y = r + g + b;
+  u = r - b;
+  v = -r + 2*g - b;
+
+  if (y < -INTERP_Y_LIMIT || y > INTERP_Y_LIMIT)
+    return 1;
+
+  if (u < -INTERP_U_LIMIT || u > INTERP_U_LIMIT)
+    return 1;
+
+  if (v < -INTERP_V_LIMIT || v > INTERP_V_LIMIT)
+    return 1;
+
+  return 0;
+}
+
+/*static void interp_set(unsigned bits_per_pixel)
+{
+   interp_bits_per_pixel = bits_per_pixel;
+}*/
+
+#if !_16BPP_HACK
+static void hq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (hq2x_interp_16_diff(c[0], c[4]))
+      mask |= 1 << 0;
+    if (hq2x_interp_16_diff(c[1], c[4]))
+      mask |= 1 << 1;
+    if (hq2x_interp_16_diff(c[2], c[4]))
+      mask |= 1 << 2;
+    if (hq2x_interp_16_diff(c[3], c[4]))
+      mask |= 1 << 3;
+    if (hq2x_interp_16_diff(c[5], c[4]))
+      mask |= 1 << 4;
+    if (hq2x_interp_16_diff(c[6], c[4]))
+      mask |= 1 << 5;
+    if (hq2x_interp_16_diff(c[7], c[4]))
+      mask |= 1 << 6;
+    if (hq2x_interp_16_diff(c[8], c[4]))
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR hq2x_interp_16_diff(c[1], c[5])
+#define HQ2X_MDR hq2x_interp_16_diff(c[5], c[7])
+#define HQ2X_MDL hq2x_interp_16_diff(c[7], c[3])
+#define HQ2X_MUL hq2x_interp_16_diff(c[3], c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void hq2xS_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      int r,g,b;
+      if (interp_bits_per_pixel == 16) {
+        b = (int)((c[j] & 0x1F)) << 3;
+        g = (int)((c[j] & 0x7E0)) >> 3;
+        r = (int)((c[j] & 0xF800)) >> 8;
+      } else {
+        b = (int)((c[j] & 0x1F)) << 3;
+        g = (int)((c[j] & 0x3E0)) >> 2;
+        r = (int)((c[j] & 0x7C00)) >> 7;
+      }
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+static void hq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = src0[0];
+      c[3] = src1[0];
+      c[6] = src2[0];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = src0[0];
+      c[5] = src1[0];
+      c[8] = src2[0];
+    }
+
+    mask = 0;
+
+    if (hq2x_interp_32_diff(c[0], c[4]))
+      mask |= 1 << 0;
+    if (hq2x_interp_32_diff(c[1], c[4]))
+      mask |= 1 << 1;
+    if (hq2x_interp_32_diff(c[2], c[4]))
+      mask |= 1 << 2;
+    if (hq2x_interp_32_diff(c[3], c[4]))
+      mask |= 1 << 3;
+    if (hq2x_interp_32_diff(c[5], c[4]))
+      mask |= 1 << 4;
+    if (hq2x_interp_32_diff(c[6], c[4]))
+      mask |= 1 << 5;
+    if (hq2x_interp_32_diff(c[7], c[4]))
+      mask |= 1 << 6;
+    if (hq2x_interp_32_diff(c[8], c[4]))
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR hq2x_interp_32_diff(c[1], c[5])
+#define HQ2X_MDR hq2x_interp_32_diff(c[5], c[7])
+#define HQ2X_MDL hq2x_interp_32_diff(c[7], c[3])
+#define HQ2X_MUL hq2x_interp_32_diff(c[3], c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void hq2xS_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = src0[0];
+      c[3] = src1[0];
+      c[6] = src2[0];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = src0[0];
+      c[5] = src1[0];
+      c[8] = src2[0];
+    }
+    
+    mask = 0;
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_hq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+/***************************************************************************/
+/* LQ2x C implementation */
+
+/*
+* This effect is derived from the hq2x effect made by Maxim Stepin
+*/
+
+#if !_16BPP_HACK
+static void lq2x_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (c[0] != c[4])
+      mask |= 1 << 0;
+    if (c[1] != c[4])
+      mask |= 1 << 1;
+    if (c[2] != c[4])
+      mask |= 1 << 2;
+    if (c[3] != c[4])
+      mask |= 1 << 3;
+    if (c[5] != c[4])
+      mask |= 1 << 4;
+    if (c[6] != c[4])
+      mask |= 1 << 5;
+    if (c[7] != c[4])
+      mask |= 1 << 6;
+    if (c[8] != c[4])
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR (c[1] != c[5])
+#define HQ2X_MDR (c[5] != c[7])
+#define HQ2X_MDL (c[7] != c[3])
+#define HQ2X_MUL (c[3] != c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void lq2xS_16_def(uint16* dst0, uint16* dst1, const uint16* src0, const uint16* src1, const uint16* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint16 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_16_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_16_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_16_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_16_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_16_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_16_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_16_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_16_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_16_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_16_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_16_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_16_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_16_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+#endif /* !_16BPP_HACK */
+
+static void lq2x_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    mask = 0;
+
+    if (c[0] != c[4])
+      mask |= 1 << 0;
+    if (c[1] != c[4])
+      mask |= 1 << 1;
+    if (c[2] != c[4])
+      mask |= 1 << 2;
+    if (c[3] != c[4])
+      mask |= 1 << 3;
+    if (c[5] != c[4])
+      mask |= 1 << 4;
+    if (c[6] != c[4])
+      mask |= 1 << 5;
+    if (c[7] != c[4])
+      mask |= 1 << 6;
+    if (c[8] != c[4])
+      mask |= 1 << 7;
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR (c[1] != c[5])
+#define HQ2X_MDR (c[5] != c[7])
+#define HQ2X_MDL (c[7] != c[3])
+#define HQ2X_MUL (c[3] != c[1])
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+static void lq2xS_32_def(uint32* dst0, uint32* dst1, const uint32* src0, const uint32* src1, const uint32* src2, unsigned count)
+{
+  unsigned i;
+
+  for(i=0;i<count;++i) {
+    unsigned char mask;
+
+    uint32 c[9];
+
+    c[1] = src0[0];
+    c[4] = src1[0];
+    c[7] = src2[0];
+
+    if (i>0) {
+      c[0] = src0[-1];
+      c[3] = src1[-1];
+      c[6] = src2[-1];
+    } else {
+      c[0] = c[1];
+      c[3] = c[4];
+      c[6] = c[7];
+    }
+
+    if (i<count-1) {
+      c[2] = src0[1];
+      c[5] = src1[1];
+      c[8] = src2[1];
+    } else {
+      c[2] = c[1];
+      c[5] = c[4];
+      c[8] = c[7];
+    }
+
+    // hq2xS dynamic edge detection:
+    // simply comparing the center color against its surroundings will give bad results in many cases,
+    // so, instead, compare the center color relative to the max difference in brightness of this 3x3 block
+    int brightArray[9];
+    int maxBright = 0, minBright = 999999;
+    for(int j = 0 ; j < 9 ; j++) {
+      const int b = (int)((c[j] & 0xF8));
+      const int g = (int)((c[j] & 0xF800)) >> 8;
+      const int r = (int)((c[j] & 0xF80000)) >> 16;
+      const int bright = r+r+r + g+g+g + b+b;
+      if(bright > maxBright) maxBright = bright;
+      if(bright < minBright) minBright = bright;
+
+      brightArray[j] = bright;
+    }
+    int diffBright = ((maxBright - minBright) * 7) >> 4;
+    if(diffBright > 7) {
+#define ABS(x) ((x) < 0 ? -(x) : (x))
+
+      const int centerBright = brightArray[4];
+      if(ABS(brightArray[0] - centerBright) > diffBright)
+        mask |= 1 << 0;
+      if(ABS(brightArray[1] - centerBright) > diffBright)
+        mask |= 1 << 1;
+      if(ABS(brightArray[2] - centerBright) > diffBright)
+        mask |= 1 << 2;
+      if(ABS(brightArray[3] - centerBright) > diffBright)
+        mask |= 1 << 3;
+      if(ABS(brightArray[5] - centerBright) > diffBright)
+        mask |= 1 << 4;
+      if(ABS(brightArray[6] - centerBright) > diffBright)
+        mask |= 1 << 5;
+      if(ABS(brightArray[7] - centerBright) > diffBright)
+        mask |= 1 << 6;
+      if(ABS(brightArray[8] - centerBright) > diffBright)
+        mask |= 1 << 7;
+    }
+
+#define P0 dst0[0]
+#define P1 dst0[1]
+#define P2 dst1[0]
+#define P3 dst1[1]
+#define HQ2X_MUR false
+#define HQ2X_MDR false
+#define HQ2X_MDL false
+#define HQ2X_MUL false
+#define IC(p0) c[p0]
+#define I11(p0,p1) hq2x_interp_32_11(c[p0], c[p1])
+#define I211(p0,p1,p2) hq2x_interp_32_211(c[p0], c[p1], c[p2])
+#define I31(p0,p1) hq2x_interp_32_31(c[p0], c[p1])
+#define I332(p0,p1,p2) hq2x_interp_32_332(c[p0], c[p1], c[p2])
+#define I431(p0,p1,p2) hq2x_interp_32_431(c[p0], c[p1], c[p2])
+#define I521(p0,p1,p2) hq2x_interp_32_521(c[p0], c[p1], c[p2])
+#define I53(p0,p1) hq2x_interp_32_53(c[p0], c[p1])
+#define I611(p0,p1,p2) hq2x_interp_32_611(c[p0], c[p1], c[p2])
+#define I71(p0,p1) hq2x_interp_32_71(c[p0], c[p1])
+#define I772(p0,p1,p2) hq2x_interp_32_772(c[p0], c[p1], c[p2])
+#define I97(p0,p1) hq2x_interp_32_97(c[p0], c[p1])
+#define I1411(p0,p1,p2) hq2x_interp_32_1411(c[p0], c[p1], c[p2])
+#define I151(p0,p1) hq2x_interp_32_151(c[p0], c[p1])
+
+    switch (mask) {
+#include "TextureFilters_lq2x.h"
+    }
+
+#undef P0
+#undef P1
+#undef P2
+#undef P3
+#undef HQ2X_MUR
+#undef HQ2X_MDR
+#undef HQ2X_MDL
+#undef HQ2X_MUL
+#undef IC
+#undef I11
+#undef I211
+#undef I31
+#undef I332
+#undef I431
+#undef I521
+#undef I53
+#undef I611
+#undef I71
+#undef I772
+#undef I97
+#undef I1411
+#undef I151
+
+    src0 += 1;
+    src1 += 1;
+    src2 += 1;
+    dst0 += 2;
+    dst1 += 2;
+  }
+}
+
+#if !_16BPP_HACK
+void hq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  hq2x_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  hq2x_16_def(dst0, dst1, src0, src1, src1, width);
+}
+
+
+void hq2xS_16(u8 *srcPtr, u32 srcPitch, u8 * /* deltaPtr */,
+              u8 *dstPtr, u32 dstPitch, int width, int height)
+{
+  u16 *dst0 = (u16 *)dstPtr;
+  u16 *dst1 = dst0 + (dstPitch >> 1);
+  
+  u16 *src0 = (u16 *)srcPtr;
+  u16 *src1 = src0 + (srcPitch >> 1);
+  u16 *src2 = src1 + (srcPitch >> 1);
+  
+  hq2xS_16_def(dst0, dst1, src0, src0, src1, width);
+  
+  int count = height;
+  
+  count -= 2;
+  while(count) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2xS_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  hq2xS_16_def(dst0, dst1, src0, src1, src1, width);
+}
+#endif /* !_16BPP_HACK */
+
+void hq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  hq2x_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  hq2x_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void hq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+  hq2xS_32_def(dst0, dst1, src0, src0, src1, width);
+  
+  int count = height;
+  
+  count -= 2;
+  while(count) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2xS_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  hq2xS_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+#if !_16BPP_HACK
+void lq2x_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  lq2x_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  lq2x_16_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void lq2xS_16(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint16 *dst0 = (uint16 *)dstPtr;
+  uint16 *dst1 = dst0 + (dstPitch >> 1);
+
+  uint16 *src0 = (uint16 *)srcPtr;
+  uint16 *src1 = src0 + (srcPitch >> 1);
+  uint16 *src2 = src1 + (srcPitch >> 1);
+
+  int count;
+
+  lq2xS_16_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch;
+    dst1 += dstPitch;
+    hq2x_16_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 1;
+    --count;
+  }
+  dst0 += dstPitch;
+  dst1 += dstPitch;
+  lq2xS_16_def(dst0, dst1, src0, src1, src1, width);
+}
+#endif /* !_16BPP_HACK */
+
+void lq2x_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  lq2x_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  lq2x_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+void lq2xS_32(uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height)
+{
+  uint32 *dst0 = (uint32 *)dstPtr;
+  uint32 *dst1 = dst0 + (dstPitch >> 2);
+
+  uint32 *src0 = (uint32 *)srcPtr;
+  uint32 *src1 = src0 + (srcPitch >> 2);
+  uint32 *src2 = src1 + (srcPitch >> 2);
+
+  int count;
+
+  lq2xS_32_def(dst0, dst1, src0, src0, src1, width);
+  if( height == 1 ) return;
+
+  count = height;
+
+  count -= 2;
+  while(count>0) {
+    dst0 += dstPitch >> 1;
+    dst1 += dstPitch >> 1;
+    hq2x_32_def(dst0, dst1, src0, src1, src2, width);
+    src0 = src1;
+    src1 = src2;
+    src2 += srcPitch >> 2;
+    --count;
+  }
+  dst0 += dstPitch >> 1;
+  dst1 += dstPitch >> 1;
+  lq2xS_32_def(dst0, dst1, src0, src1, src1, width);
+}
+
+/************************************************************************/
+/* hq3x filters                                                         */
+/************************************************************************/
+
+/************************************************************************/
+/* scale2x filters                                                      */
+/************************************************************************/
+
+/************************************************************************/
+/* scale3x filters                                                      */
+/************************************************************************/
+
diff --git a/GLideNHQ/TextureFilters_hq2x.h b/GLideNHQ/TextureFilters_hq2x.h
new file mode 100644
index 00000000..7946323b
--- /dev/null
+++ b/GLideNHQ/TextureFilters_hq2x.h
@@ -0,0 +1,1847 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+case 0 : 
+case 1 : 
+case 4 : 
+case 5 : 
+case 32 : 
+case 33 : 
+case 36 : 
+case 37 : 
+case 128 : 
+case 129 : 
+case 132 : 
+case 133 : 
+case 160 : 
+case 161 : 
+case 164 : 
+case 165 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 2 : 
+case 34 : 
+case 130 : 
+case 162 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 3 : 
+case 35 : 
+case 131 : 
+case 163 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 6 : 
+case 38 : 
+case 134 : 
+case 166 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 7 : 
+case 39 : 
+case 135 : 
+case 167 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I211(4, 3, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 8 : 
+case 12 : 
+case 136 : 
+case 140 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+} break;
+case 9 : 
+case 13 : 
+case 137 : 
+case 141 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+} break;
+case 10 : 
+case 138 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 11 : 
+case 139 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 14 : 
+case 142 : 
+{
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 15 : 
+case 143 : 
+{
+  P2 = I31(4, 6);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 16 : 
+case 17 : 
+case 48 : 
+case 49 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+} break;
+case 18 : 
+case 50 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 19 : 
+case 51 : 
+{
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = I31(4, 2);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 20 : 
+case 21 : 
+case 52 : 
+case 53 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+} break;
+case 22 : 
+case 54 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 23 : 
+case 55 : 
+{
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = IC(4);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 24 : 
+case 66 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 25 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 26 : 
+case 31 : 
+case 95 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 27 : 
+case 75 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 28 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 29 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 30 : 
+case 86 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 40 : 
+case 44 : 
+case 168 : 
+case 172 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 41 : 
+case 45 : 
+case 169 : 
+case 173 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+} break;
+case 42 : 
+case 170 : 
+{
+  P1 = I31(4, 2);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 43 : 
+case 171 : 
+{
+  P1 = I31(4, 2);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 46 : 
+case 174 : 
+{
+  P1 = I31(4, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 47 : 
+case 175 : 
+{
+  P1 = I31(4, 5);
+  P2 = I31(4, 7);
+  P3 = I211(4, 5, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 56 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 57 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 58 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 59 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 60 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 61 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+} break;
+case 62 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 63 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 8);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 64 : 
+case 65 : 
+case 68 : 
+case 69 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 67 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 70 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 71 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 8);
+} break;
+case 72 : 
+case 76 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 73 : 
+case 77 : 
+{
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = I31(4, 6);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 74 : 
+case 107 : 
+case 123 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 78 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 79 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 80 : 
+case 81 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 82 : 
+case 214 : 
+case 222 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 83 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 84 : 
+case 85 : 
+{
+  P0 = I211(4, 1, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = I31(4, 8);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 87 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 88 : 
+case 248 : 
+case 250 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 89 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 90 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 91 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 92 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 93 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 94 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 96 : 
+case 97 : 
+case 100 : 
+case 101 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 98 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 99 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 102 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 103 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+} break;
+case 104 : 
+case 108 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 105 : 
+case 109 : 
+{
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = IC(4);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 106 : 
+case 120 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 110 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 111 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 112 : 
+case 113 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = I31(4, 8);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 114 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 115 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 116 : 
+case 117 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 118 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 119 : 
+{
+  P2 = I31(4, 3);
+  P3 = I31(4, 8);
+  if (HQ2X_MUR) {
+    P0 = I31(4, 3);
+    P1 = IC(4);
+  } else {
+    P0 = I521(4, 1, 3);
+    P1 = I332(1, 5, 4);
+  }
+} break;
+case 121 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+} break;
+case 122 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = I31(4, 8);
+  } else {
+    P3 = I611(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 124 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+} break;
+case 125 : 
+{
+  P1 = I31(4, 1);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P0 = I31(4, 1);
+    P2 = IC(4);
+  } else {
+    P0 = I521(4, 3, 1);
+    P2 = I332(3, 7, 4);
+  }
+} break;
+case 126 : 
+{
+  P0 = I31(4, 0);
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 127 : 
+{
+  P3 = I31(4, 8);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 144 : 
+case 145 : 
+case 176 : 
+case 177 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+} break;
+case 146 : 
+case 178 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 147 : 
+case 179 : 
+{
+  P0 = I31(4, 3);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 148 : 
+case 149 : 
+case 180 : 
+case 181 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+} break;
+case 150 : 
+case 182 : 
+{
+  P0 = I31(4, 0);
+  P2 = I211(4, 3, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 151 : 
+case 183 : 
+{
+  P0 = I31(4, 3);
+  P2 = I211(4, 3, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 152 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 153 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 154 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 155 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 156 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 157 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+} break;
+case 158 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 159 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 184 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 185 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 186 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 187 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P2 = I31(4, 7);
+  } else {
+    P0 = I332(1, 3, 4);
+    P2 = I521(4, 3, 7);
+  }
+} break;
+case 188 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 189 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+} break;
+case 190 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 7);
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+    P3 = I31(4, 7);
+  } else {
+    P1 = I332(1, 5, 4);
+    P3 = I521(4, 5, 7);
+  }
+} break;
+case 191 : 
+{
+  P2 = I31(4, 7);
+  P3 = I31(4, 7);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 192 : 
+case 193 : 
+case 196 : 
+case 197 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 194 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 195 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 198 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 199 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+} break;
+case 200 : 
+case 204 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 201 : 
+case 205 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+} break;
+case 202 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 203 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 206 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 207 : 
+{
+  P2 = I31(4, 6);
+  P3 = I31(4, 5);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = I31(4, 5);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I521(4, 1, 5);
+  }
+} break;
+case 208 : 
+case 209 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 210 : 
+case 216 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 211 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 212 : 
+case 213 : 
+{
+  P0 = I211(4, 1, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = IC(4);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 215 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 217 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 218 : 
+{
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 219 : 
+{
+  P1 = I31(4, 2);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 220 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = I31(4, 6);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 221 : 
+{
+  P0 = I31(4, 1);
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P1 = I31(4, 1);
+    P3 = IC(4);
+  } else {
+    P1 = I521(4, 5, 1);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 223 : 
+{
+  P2 = I31(4, 6);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 224 : 
+case 225 : 
+case 228 : 
+case 229 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I211(4, 1, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 226 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 227 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 230 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 231 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 5);
+  P2 = I31(4, 3);
+  P3 = I31(4, 5);
+} break;
+case 232 : 
+case 236 : 
+{
+  P0 = I31(4, 0);
+  P1 = I211(4, 1, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 233 : 
+case 237 : 
+{
+  P0 = I31(4, 1);
+  P1 = I211(4, 1, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+} break;
+case 234 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = I31(4, 0);
+  } else {
+    P0 = I611(4, 1, 3);
+  }
+} break;
+case 235 : 
+{
+  P1 = I31(4, 2);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 238 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+    P3 = I31(4, 5);
+  } else {
+    P2 = I332(3, 7, 4);
+    P3 = I521(4, 7, 5);
+  }
+} break;
+case 239 : 
+{
+  P1 = I31(4, 5);
+  P3 = I31(4, 5);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 240 : 
+case 241 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = IC(4);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 242 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = I31(4, 2);
+  } else {
+    P1 = I611(4, 1, 5);
+  }
+} break;
+case 243 : 
+{
+  P0 = I31(4, 3);
+  P1 = I31(4, 2);
+  if (HQ2X_MDR) {
+    P2 = I31(4, 3);
+    P3 = IC(4);
+  } else {
+    P2 = I521(4, 7, 3);
+    P3 = I332(5, 7, 4);
+  }
+} break;
+case 244 : 
+case 245 : 
+{
+  P0 = I211(4, 1, 3);
+  P1 = I31(4, 1);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 246 : 
+{
+  P0 = I31(4, 0);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 247 : 
+{
+  P0 = I31(4, 3);
+  P2 = I31(4, 3);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 249 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+} break;
+case 251 : 
+{
+  P1 = I31(4, 2);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 252 : 
+{
+  P0 = I31(4, 0);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 253 : 
+{
+  P0 = I31(4, 1);
+  P1 = I31(4, 1);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+} break;
+case 254 : 
+{
+  P0 = I31(4, 0);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 255 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
diff --git a/GLideNHQ/TextureFilters_hq4x.cpp b/GLideNHQ/TextureFilters_hq4x.cpp
new file mode 100644
index 00000000..89c14ea2
--- /dev/null
+++ b/GLideNHQ/TextureFilters_hq4x.cpp
@@ -0,0 +1,892 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*  Based on Maxim Stepin and Rice1964 hq4x code */
+
+#include <math.h>
+#include <stdlib.h>
+#include "TextureFilters.h"
+
+#if !_16BPP_HACK
+static uint32 RGB444toYUV[4096];
+#define RGB444toYUV(val) RGB444toYUV[val & 0x0FFF]   /* val = ARGB4444 */
+
+/*inline static uint32 RGB444toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x0F00) >> 4;
+  g = (val & 0x00F0);
+  b = val & 0x000F;
+  r |= r >> 4;
+  g |= g >> 4;
+  b |= b << 4;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}*/
+
+static uint32 RGB555toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x7C00) >> 7;
+  g = (val & 0x03E0) >> 2;
+  b = (val & 0x001F) << 3;
+  r |= r >> 5;
+  g |= g >> 5;
+  b |= b >> 5;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}
+
+static uint32 RGB565toYUV(uint32 val)
+{
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0xF800) >> 8;
+  g = (val & 0x07E0) >> 3;
+  b = (val & 0x001F) << 3;
+  r |= r >> 5;
+  g |= g >> 6;
+  b |= b >> 5;
+
+  Y = (r + g + b) >> 2;
+  u = 128 + ((r - b) >> 2);
+  v = 128 + ((2*g - r - b)>>3);
+
+  return ((Y << 16) | (u << 8) | v);
+}
+#endif /* !_16BPP_HACK */
+
+static uint32 RGB888toYUV(uint32 val)
+{
+#if 0
+  uint32 Yuv;
+
+  __asm {
+    mov eax, dword ptr [val];
+    mov ebx, eax;
+    mov ecx, eax;
+    and ebx, 0x000000ff; // b
+    and eax, 0x00ff0000; // r
+    and ecx, 0x0000ff00; // g
+    shl ebx, 14;
+    shr eax, 2;
+    shl ecx, 6;
+    mov edx, ebx;
+    add edx, eax;
+    add edx, ecx;
+    and edx, 0xffff0000;
+
+    sub eax, ebx;
+    add eax, 0x00800000;
+    shr eax, 8;
+    or  edx, eax;
+    sub eax, 0x00800000;
+    and edx, 0xffffff00;
+
+    add ecx, 0x00800000;
+    shr ecx, 5;
+    shr ebx, 7;
+    add eax, ebx;
+    sub ecx, eax;
+    shr ecx, 11;
+    or  edx, ecx;
+
+    mov dword ptr [Yuv], edx;
+  }
+
+  return Yuv;
+#else
+  uint32 r, g, b, Y, u, v;
+
+  r = (val & 0x00ff0000) >> 16;
+  g = (val & 0x0000ff00) >> 8;
+  b = val & 0x000000ff;
+
+  Y = (r + g + b) >> 2;
+  u = (0x00000200 + r - b) >> 2;
+  v = (0x00000400 + (g << 1) - r - b) >> 3;
+
+  return ((Y << 16) | (u << 8) | v);
+#endif
+}
+
+#define Ymask 0x00FF0000
+#define Umask 0x0000FF00
+#define Vmask 0x000000FF
+#define trY 0x00300000 // ?
+#define trU 0x00000700 // ??
+#define trV 0x00000006 // ???
+
+#define HQ4X_INTERP1(n, b) \
+static void hq4x_Interp1_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /* *((uint##b*)pc) = (p1*3+p2) >> 2; */ \
+  *((uint##b*)pc) = INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*3 + INTERP_##n##_MASK_1_3(p2)) / 4) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*3 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 4 ); \
+}
+
+#define HQ4X_INTERP2(n, b) \
+static void hq4x_Interp2_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*2+p2+p3) >> 2;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*2 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 4) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*2 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 4); \
+}
+
+#define HQ4X_INTERP3(n, b) \
+static void hq4x_Interp3_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1*7+p2)/8;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*7 + INTERP_##n##_MASK_1_3(p2)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*7 + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 8); \
+}
+
+#define HQ4X_INTERP5(n, b) \
+static void hq4x_Interp5_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1+p2) >> 1;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1) + INTERP_##n##_MASK_1_3(p2)) / 2) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1) + INTERP_##n##_MASK_SHIFT_2_4(p2)) / 2); \
+}
+
+#define HQ4X_INTERP6(n, b) \
+static void hq4x_Interp6_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*5+p2*2+p3)/8;*/ \
+  *((uint##b*)pc) =  INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*2 + INTERP_##n##_MASK_1_3(p3)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*2 + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
+}
+
+#define HQ4X_INTERP7(n, b) \
+static void hq4x_Interp7_##n (uint8 * pc, uint##b p1, uint##b p2, uint##b p3) \
+{ \
+  /**((uint##b*)pc) = (p1*6+p2+p3)/8;*/ \
+  *((uint##b*)pc) =   INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*6 + INTERP_##n##_MASK_1_3(p2) + INTERP_##n##_MASK_1_3(p3)) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*6 + INTERP_##n##_MASK_SHIFT_2_4(p2) + INTERP_##n##_MASK_SHIFT_2_4(p3)) / 8); \
+}
+
+#define HQ4X_INTERP8(n, b) \
+static void hq4x_Interp8_##n (uint8 * pc, uint##b p1, uint##b p2) \
+{ \
+  /**((uint##b*)pc) = (p1*5+p2*3)/8;*/ \
+  *((uint##b*)pc) =   INTERP_##n##_MASK_1_3((INTERP_##n##_MASK_1_3(p1)*5 + INTERP_##n##_MASK_1_3(p2)*3) / 8) \
+    | INTERP_##n##_MASK_SHIFTBACK_2_4((INTERP_##n##_MASK_SHIFT_2_4(p1)*5 + INTERP_##n##_MASK_SHIFT_2_4(p2)*3) / 8); \
+}
+
+#if !_16BPP_HACK
+#define INTERP_4444_MASK_1_3(v)           (v & 0x0F0F)
+#define INTERP_4444_MASK_SHIFT_2_4(v)     ((v & 0xF0F0) >> 4)
+#define INTERP_4444_MASK_SHIFTBACK_2_4(v) (INTERP_4444_MASK_1_3(v) << 4)
+HQ4X_INTERP1(4444, 16)
+HQ4X_INTERP2(4444, 16)
+HQ4X_INTERP3(4444, 16)
+HQ4X_INTERP5(4444, 16)
+HQ4X_INTERP6(4444, 16)
+HQ4X_INTERP7(4444, 16)
+HQ4X_INTERP8(4444, 16)
+
+#define INTERP_1555_MASK_1_3(v)           (v & 0x7C1F)
+#define INTERP_1555_MASK_SHIFT_2_4(v)     ((v & 0x83E0) >> 5)
+#define INTERP_1555_MASK_SHIFTBACK_2_4(v) (INTERP_1555_MASK_1_3(v) << 5)
+HQ4X_INTERP1(1555, 16)
+HQ4X_INTERP2(1555, 16)
+HQ4X_INTERP3(1555, 16)
+HQ4X_INTERP5(1555, 16)
+HQ4X_INTERP6(1555, 16)
+HQ4X_INTERP7(1555, 16)
+HQ4X_INTERP8(1555, 16)
+
+#define INTERP_565_MASK_1_3(v)           (v & 0xF81F)
+#define INTERP_565_MASK_SHIFT_2_4(v)     ((v & 0x7E0) >> 5)
+#define INTERP_565_MASK_SHIFTBACK_2_4(v) (INTERP_565_MASK_1_3(v) << 5)
+HQ4X_INTERP1(565, 16)
+HQ4X_INTERP2(565, 16)
+HQ4X_INTERP3(565, 16)
+HQ4X_INTERP5(565, 16)
+HQ4X_INTERP6(565, 16)
+HQ4X_INTERP7(565, 16)
+HQ4X_INTERP8(565, 16)
+#endif /* !_16BPP_HACK */
+
+#define INTERP_8888_MASK_1_3(v)           (v & 0x00FF00FF)
+#define INTERP_8888_MASK_SHIFT_2_4(v)     ((v & 0xFF00FF00) >> 8)
+#define INTERP_8888_MASK_SHIFTBACK_2_4(v) (INTERP_8888_MASK_1_3(v) << 8)
+HQ4X_INTERP1(8888, 32)
+HQ4X_INTERP2(8888, 32)
+HQ4X_INTERP3(8888, 32)
+HQ4X_INTERP5(8888, 32)
+HQ4X_INTERP6(8888, 32)
+HQ4X_INTERP7(8888, 32)
+HQ4X_INTERP8(8888, 32)
+
+#define PIXEL00_0     *((int*)(pOut)) = c[5];
+#define PIXEL00_11    hq4x_Interp1(pOut, c[5], c[4]);
+#define PIXEL00_12    hq4x_Interp1(pOut, c[5], c[2]);
+#define PIXEL00_20    hq4x_Interp2(pOut, c[5], c[2], c[4]);
+#define PIXEL00_50    hq4x_Interp5(pOut, c[2], c[4]);
+#define PIXEL00_80    hq4x_Interp8(pOut, c[5], c[1]);
+#define PIXEL00_81    hq4x_Interp8(pOut, c[5], c[4]);
+#define PIXEL00_82    hq4x_Interp8(pOut, c[5], c[2]);
+#define PIXEL01_0     *((int*)(pOut+BPP)) = c[5];
+#define PIXEL01_10    hq4x_Interp1(pOut+BPP, c[5], c[1]);
+#define PIXEL01_12    hq4x_Interp1(pOut+BPP, c[5], c[2]);
+#define PIXEL01_14    hq4x_Interp1(pOut+BPP, c[2], c[5]);
+#define PIXEL01_21    hq4x_Interp2(pOut+BPP, c[2], c[5], c[4]);
+#define PIXEL01_31    hq4x_Interp3(pOut+BPP, c[5], c[4]);
+#define PIXEL01_50    hq4x_Interp5(pOut+BPP, c[2], c[5]);
+#define PIXEL01_60    hq4x_Interp6(pOut+BPP, c[5], c[2], c[4]);
+#define PIXEL01_61    hq4x_Interp6(pOut+BPP, c[5], c[2], c[1]);
+#define PIXEL01_82    hq4x_Interp8(pOut+BPP, c[5], c[2]);
+#define PIXEL01_83    hq4x_Interp8(pOut+BPP, c[2], c[4]);
+#define PIXEL02_0     *((int*)(pOut+BPP2)) = c[5];
+#define PIXEL02_10    hq4x_Interp1(pOut+BPP2, c[5], c[3]);
+#define PIXEL02_11    hq4x_Interp1(pOut+BPP2, c[5], c[2]);
+#define PIXEL02_13    hq4x_Interp1(pOut+BPP2, c[2], c[5]);
+#define PIXEL02_21    hq4x_Interp2(pOut+BPP2, c[2], c[5], c[6]);
+#define PIXEL02_32    hq4x_Interp3(pOut+BPP2, c[5], c[6]);
+#define PIXEL02_50    hq4x_Interp5(pOut+BPP2, c[2], c[5]);
+#define PIXEL02_60    hq4x_Interp6(pOut+BPP2, c[5], c[2], c[6]);
+#define PIXEL02_61    hq4x_Interp6(pOut+BPP2, c[5], c[2], c[3]);
+#define PIXEL02_81    hq4x_Interp8(pOut+BPP2, c[5], c[2]);
+#define PIXEL02_83    hq4x_Interp8(pOut+BPP2, c[2], c[6]);
+#define PIXEL03_0     *((int*)(pOut+BPP3)) = c[5];
+#define PIXEL03_11    hq4x_Interp1(pOut+BPP3, c[5], c[2]);
+#define PIXEL03_12    hq4x_Interp1(pOut+BPP3, c[5], c[6]);
+#define PIXEL03_20    hq4x_Interp2(pOut+BPP3, c[5], c[2], c[6]);
+#define PIXEL03_50    hq4x_Interp5(pOut+BPP3, c[2], c[6]);
+#define PIXEL03_80    hq4x_Interp8(pOut+BPP3, c[5], c[3]);
+#define PIXEL03_81    hq4x_Interp8(pOut+BPP3, c[5], c[2]);
+#define PIXEL03_82    hq4x_Interp8(pOut+BPP3, c[5], c[6]);
+#define PIXEL10_0     *((int*)(pOut+BpL)) = c[5];
+#define PIXEL10_10    hq4x_Interp1(pOut+BpL, c[5], c[1]);
+#define PIXEL10_11    hq4x_Interp1(pOut+BpL, c[5], c[4]);
+#define PIXEL10_13    hq4x_Interp1(pOut+BpL, c[4], c[5]);
+#define PIXEL10_21    hq4x_Interp2(pOut+BpL, c[4], c[5], c[2]);
+#define PIXEL10_32    hq4x_Interp3(pOut+BpL, c[5], c[2]);
+#define PIXEL10_50    hq4x_Interp5(pOut+BpL, c[4], c[5]);
+#define PIXEL10_60    hq4x_Interp6(pOut+BpL, c[5], c[4], c[2]);
+#define PIXEL10_61    hq4x_Interp6(pOut+BpL, c[5], c[4], c[1]);
+#define PIXEL10_81    hq4x_Interp8(pOut+BpL, c[5], c[4]);
+#define PIXEL10_83    hq4x_Interp8(pOut+BpL, c[4], c[2]);
+#define PIXEL11_0     *((int*)(pOut+BpL+BPP)) = c[5];
+#define PIXEL11_30    hq4x_Interp3(pOut+BpL+BPP, c[5], c[1]);
+#define PIXEL11_31    hq4x_Interp3(pOut+BpL+BPP, c[5], c[4]);
+#define PIXEL11_32    hq4x_Interp3(pOut+BpL+BPP, c[5], c[2]);
+#define PIXEL11_70    hq4x_Interp7(pOut+BpL+BPP, c[5], c[4], c[2]);
+#define PIXEL12_0     *((int*)(pOut+BpL+BPP2)) = c[5];
+#define PIXEL12_30    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[3]);
+#define PIXEL12_31    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[2]);
+#define PIXEL12_32    hq4x_Interp3(pOut+BpL+BPP2, c[5], c[6]);
+#define PIXEL12_70    hq4x_Interp7(pOut+BpL+BPP2, c[5], c[6], c[2]);
+#define PIXEL13_0     *((int*)(pOut+BpL+BPP3)) = c[5];
+#define PIXEL13_10    hq4x_Interp1(pOut+BpL+BPP3, c[5], c[3]);
+#define PIXEL13_12    hq4x_Interp1(pOut+BpL+BPP3, c[5], c[6]);
+#define PIXEL13_14    hq4x_Interp1(pOut+BpL+BPP3, c[6], c[5]);
+#define PIXEL13_21    hq4x_Interp2(pOut+BpL+BPP3, c[6], c[5], c[2]);
+#define PIXEL13_31    hq4x_Interp3(pOut+BpL+BPP3, c[5], c[2]);
+#define PIXEL13_50    hq4x_Interp5(pOut+BpL+BPP3, c[6], c[5]);
+#define PIXEL13_60    hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[2]);
+#define PIXEL13_61    hq4x_Interp6(pOut+BpL+BPP3, c[5], c[6], c[3]);
+#define PIXEL13_82    hq4x_Interp8(pOut+BpL+BPP3, c[5], c[6]);
+#define PIXEL13_83    hq4x_Interp8(pOut+BpL+BPP3, c[6], c[2]);
+#define PIXEL20_0     *((int*)(pOut+BpL+BpL)) = c[5];
+#define PIXEL20_10    hq4x_Interp1(pOut+BpL+BpL, c[5], c[7]);
+#define PIXEL20_12    hq4x_Interp1(pOut+BpL+BpL, c[5], c[4]);
+#define PIXEL20_14    hq4x_Interp1(pOut+BpL+BpL, c[4], c[5]);
+#define PIXEL20_21    hq4x_Interp2(pOut+BpL+BpL, c[4], c[5], c[8]);
+#define PIXEL20_31    hq4x_Interp3(pOut+BpL+BpL, c[5], c[8]);
+#define PIXEL20_50    hq4x_Interp5(pOut+BpL+BpL, c[4], c[5]);
+#define PIXEL20_60    hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[8]);
+#define PIXEL20_61    hq4x_Interp6(pOut+BpL+BpL, c[5], c[4], c[7]);
+#define PIXEL20_82    hq4x_Interp8(pOut+BpL+BpL, c[5], c[4]);
+#define PIXEL20_83    hq4x_Interp8(pOut+BpL+BpL, c[4], c[8]);
+#define PIXEL21_0     *((int*)(pOut+BpL+BpL+BPP)) = c[5];
+#define PIXEL21_30    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[7]);
+#define PIXEL21_31    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL21_32    hq4x_Interp3(pOut+BpL+BpL+BPP, c[5], c[4]);
+#define PIXEL21_70    hq4x_Interp7(pOut+BpL+BpL+BPP, c[5], c[4], c[8]);
+#define PIXEL22_0     *((int*)(pOut+BpL+BpL+BPP2)) = c[5];
+#define PIXEL22_30    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[9]);
+#define PIXEL22_31    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[6]);
+#define PIXEL22_32    hq4x_Interp3(pOut+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL22_70    hq4x_Interp7(pOut+BpL+BpL+BPP2, c[5], c[6], c[8]);
+#define PIXEL23_0     *((int*)(pOut+BpL+BpL+BPP3)) = c[5];
+#define PIXEL23_10    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[9]);
+#define PIXEL23_11    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL23_13    hq4x_Interp1(pOut+BpL+BpL+BPP3, c[6], c[5]);
+#define PIXEL23_21    hq4x_Interp2(pOut+BpL+BpL+BPP3, c[6], c[5], c[8]);
+#define PIXEL23_32    hq4x_Interp3(pOut+BpL+BpL+BPP3, c[5], c[8]);
+#define PIXEL23_50    hq4x_Interp5(pOut+BpL+BpL+BPP3, c[6], c[5]);
+#define PIXEL23_60    hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[8]);
+#define PIXEL23_61    hq4x_Interp6(pOut+BpL+BpL+BPP3, c[5], c[6], c[9]);
+#define PIXEL23_81    hq4x_Interp8(pOut+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL23_83    hq4x_Interp8(pOut+BpL+BpL+BPP3, c[6], c[8]);
+#define PIXEL30_0     *((int*)(pOut+BpL+BpL+BpL)) = c[5];
+#define PIXEL30_11    hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[8]);
+#define PIXEL30_12    hq4x_Interp1(pOut+BpL+BpL+BpL, c[5], c[4]);
+#define PIXEL30_20    hq4x_Interp2(pOut+BpL+BpL+BpL, c[5], c[8], c[4]);
+#define PIXEL30_50    hq4x_Interp5(pOut+BpL+BpL+BpL, c[8], c[4]);
+#define PIXEL30_80    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[7]);
+#define PIXEL30_81    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[8]);
+#define PIXEL30_82    hq4x_Interp8(pOut+BpL+BpL+BpL, c[5], c[4]);
+#define PIXEL31_0     *((int*)(pOut+BpL+BpL+BpL+BPP)) = c[5];
+#define PIXEL31_10    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[7]);
+#define PIXEL31_11    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL31_13    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
+#define PIXEL31_21    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP, c[8], c[5], c[4]);
+#define PIXEL31_32    hq4x_Interp3(pOut+BpL+BpL+BpL+BPP, c[5], c[4]);
+#define PIXEL31_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP, c[8], c[5]);
+#define PIXEL31_60    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[4]);
+#define PIXEL31_61    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP, c[5], c[8], c[7]);
+#define PIXEL31_81    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[5], c[8]);
+#define PIXEL31_83    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP, c[8], c[4]);
+#define PIXEL32_0     *((int*)(pOut+BpL+BpL+BpL+BPP2)) = c[5];
+#define PIXEL32_10    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[9]);
+#define PIXEL32_12    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL32_14    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
+#define PIXEL32_21    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP2, c[8], c[5], c[6]);
+#define PIXEL32_31    hq4x_Interp3(pOut+BpL+BpL+BpL+BPP2, c[5], c[6]);
+#define PIXEL32_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP2, c[8], c[5]);
+#define PIXEL32_60    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[6]);
+#define PIXEL32_61    hq4x_Interp6(pOut+BpL+BpL+BpL+BPP2, c[5], c[8], c[9]);
+#define PIXEL32_82    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[5], c[8]);
+#define PIXEL32_83    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP2, c[8], c[6]);
+#define PIXEL33_0     *((int*)(pOut+BpL+BpL+BpL+BPP3)) = c[5];
+#define PIXEL33_11    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL33_12    hq4x_Interp1(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
+#define PIXEL33_20    hq4x_Interp2(pOut+BpL+BpL+BpL+BPP3, c[5], c[8], c[6]);
+#define PIXEL33_50    hq4x_Interp5(pOut+BpL+BpL+BpL+BPP3, c[8], c[6]);
+#define PIXEL33_80    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[9]);
+#define PIXEL33_81    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[6]);
+#define PIXEL33_82    hq4x_Interp8(pOut+BpL+BpL+BpL+BPP3, c[5], c[8]);
+
+#define HQ4X_DIFF(n, b) \
+static int Diff_##n (uint##b w1, uint##b w2) \
+{ \
+  int YUV1, YUV2; \
+  YUV1 = RGB##n##toYUV(w1); \
+  YUV2 = RGB##n##toYUV(w2); \
+  return ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) || \
+           ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) || \
+           ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) ); \
+}
+
+HQ4X_DIFF(888, 32)
+
+#if !_16BPP_HACK
+HQ4X_DIFF(444, 16)
+HQ4X_DIFF(555, 16)
+HQ4X_DIFF(565, 16)
+
+void hq4x_4444(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_4444
+#define hq4x_Interp2 hq4x_Interp2_4444
+#define hq4x_Interp3 hq4x_Interp3_4444
+#define hq4x_Interp4 hq4x_Interp4_4444
+#define hq4x_Interp5 hq4x_Interp5_4444
+#define hq4x_Interp6 hq4x_Interp6_4444
+#define hq4x_Interp7 hq4x_Interp7_4444
+#define hq4x_Interp8 hq4x_Interp8_4444
+#define Diff Diff_444
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      }   else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB444toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB444toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+void hq4x_1555(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_1555
+#define hq4x_Interp2 hq4x_Interp2_1555
+#define hq4x_Interp3 hq4x_Interp3_1555
+#define hq4x_Interp4 hq4x_Interp4_1555
+#define hq4x_Interp5 hq4x_Interp5_1555
+#define hq4x_Interp6 hq4x_Interp6_1555
+#define hq4x_Interp7 hq4x_Interp7_1555
+#define hq4x_Interp8 hq4x_Interp8_1555
+#define Diff Diff_555
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      }   else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB555toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB555toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+void hq4x_565(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_565
+#define hq4x_Interp2 hq4x_Interp2_565
+#define hq4x_Interp3 hq4x_Interp3_565
+#define hq4x_Interp4 hq4x_Interp4_565
+#define hq4x_Interp5 hq4x_Interp5_565
+#define hq4x_Interp6 hq4x_Interp6_565
+#define hq4x_Interp7 hq4x_Interp7_565
+#define hq4x_Interp8 hq4x_Interp8_565
+#define Diff Diff_565
+#define BPP   2
+#define BPP2  4
+#define BPP3  6
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint16  w[10];
+  uint16  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*2; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*2; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint16*)(pIn + prevline));
+      w[5] = *((uint16*)pIn);
+      w[8] = *((uint16*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint16*)(pIn + prevline - 2));
+        w[4] = *((uint16*)(pIn - 2));
+        w[7] = *((uint16*)(pIn + nextline - 2));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint16*)(pIn + prevline + 2));
+        w[6] = *((uint16*)(pIn + 2));
+        w[9] = *((uint16*)(pIn + nextline + 2));
+      } else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB565toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB565toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=2;
+      pOut+=8;
+    }
+    pIn += 2*(SrcPPL-Xres);
+    pOut+= 8*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+#endif /* !_16BPP_HACK */
+
+void hq4x_8888(unsigned char * pIn, unsigned char * pOut, int Xres, int Yres, int SrcPPL, int BpL)
+{
+#define hq4x_Interp1 hq4x_Interp1_8888
+#define hq4x_Interp2 hq4x_Interp2_8888
+#define hq4x_Interp3 hq4x_Interp3_8888
+#define hq4x_Interp4 hq4x_Interp4_8888
+#define hq4x_Interp5 hq4x_Interp5_8888
+#define hq4x_Interp6 hq4x_Interp6_8888
+#define hq4x_Interp7 hq4x_Interp7_8888
+#define hq4x_Interp8 hq4x_Interp8_8888
+#define Diff Diff_888
+#define BPP  4
+#define BPP2 8
+#define BPP3 12
+
+  int  i, j, k;
+  int  prevline, nextline;
+  uint32  w[10];
+  uint32  c[10];
+
+  int pattern;
+  int flag;
+
+  int YUV1, YUV2;
+
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w1 | w2 | w3 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w4 | w5 | w6 |
+  //   +----+----+----+
+  //   |    |    |    |
+  //   | w7 | w8 | w9 |
+  //   +----+----+----+
+
+  for (j = 0; j < Yres; j++) {
+    if (j>0)      prevline = -SrcPPL*4; else prevline = 0;
+    if (j<Yres-1) nextline =  SrcPPL*4; else nextline = 0;
+
+    for (i=0; i<Xres; i++) {
+      w[2] = *((uint32*)(pIn + prevline));
+      w[5] = *((uint32*)pIn);
+      w[8] = *((uint32*)(pIn + nextline));
+
+      if (i>0) {
+        w[1] = *((uint32*)(pIn + prevline - 4));
+        w[4] = *((uint32*)(pIn - 4));
+        w[7] = *((uint32*)(pIn + nextline - 4));
+      } else {
+        w[1] = w[2];
+        w[4] = w[5];
+        w[7] = w[8];
+      }
+
+      if (i<Xres-1) {
+        w[3] = *((uint32*)(pIn + prevline + 4));
+        w[6] = *((uint32*)(pIn + 4));
+        w[9] = *((uint32*)(pIn + nextline + 4));
+      } else {
+        w[3] = w[2];
+        w[6] = w[5];
+        w[9] = w[8];
+      }
+
+      pattern = 0;
+      flag = 1;
+
+      YUV1 = RGB888toYUV(w[5]);
+
+      for (k=1; k<=9; k++) {
+        if (k==5) continue;
+
+        if ( w[k] != w[5] ) {
+          YUV2 = RGB888toYUV(w[k]);
+          if ( ( abs((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
+               ( abs((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
+               ( abs((YUV1 & Vmask) - (YUV2 & Vmask)) > trV ) )
+            pattern |= flag;
+        }
+        flag <<= 1;
+      }
+
+      for (k=1; k<=9; k++)
+        c[k] = w[k];
+
+#include "TextureFilters_hq4x.h"
+
+      pIn+=4;
+      pOut+=16;
+    }
+
+    pIn += 4*(SrcPPL-Xres);
+    pOut+= 16*(SrcPPL-Xres);
+    pOut+=BpL;
+    pOut+=BpL;
+    pOut+=BpL;
+  }
+
+#undef BPP
+#undef BPP2
+#undef BPP3
+#undef Diff
+#undef hq4x_Interp1
+#undef hq4x_Interp2
+#undef hq4x_Interp3
+#undef hq4x_Interp4
+#undef hq4x_Interp5
+#undef hq4x_Interp6
+#undef hq4x_Interp7
+#undef hq4x_Interp8
+}
+
+#if !_16BPP_HACK
+void hq4x_init(void)
+{
+  static int done = 0;
+  int r, g, b, Y, u, v, i, j, k;
+
+  if (done ) return;
+
+  for (i = 0; i < 16; i++) {
+    for (j = 0; j < 16; j++) {
+      for (k = 0; k < 16; k++) {
+        r = (i << 4) | i;
+        g = (j << 4) | j;
+        b = (k << 4) | k;
+
+        /* Microsoft's RGB888->YUV conversion */
+        /*Y = (((  66 * r + 129 * g +  25 * b + 128) >> 8) + 16) & 0xFF;
+        u = ((( -38 * r -  74 * g + 112 * b + 128) >> 8) + 128) & 0xFF;
+        v = ((( 112 * r -  94 * g -  18 * b + 128) >> 8) + 128) & 0xFF;*/
+
+        Y = (r + g + b) >> 2;
+        u = 128 + ((r - b) >> 2);
+        v = 128 + ((-r + 2*g -b)>>3);
+
+        RGB444toYUV[(i << 8) | (j << 4) | k] = (Y << 16) | (u << 8) | v;
+      }
+    }
+  }
+
+  done = 1;
+}
+#endif /* !_16BPP_HACK */
diff --git a/GLideNHQ/TextureFilters_hq4x.h b/GLideNHQ/TextureFilters_hq4x.h
new file mode 100644
index 00000000..a3a27403
--- /dev/null
+++ b/GLideNHQ/TextureFilters_hq4x.h
@@ -0,0 +1,4999 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*  Based on Maxim Stepin and Rice1964 hq4x code */
+
+      switch (pattern)
+      {
+        case 0:
+        case 1:
+        case 4:
+        case 32:
+        case 128:
+        case 5:
+        case 132:
+        case 160:
+        case 33:
+        case 129:
+        case 36:
+        case 133:
+        case 164:
+        case 161:
+        case 37:
+        case 165:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 2:
+        case 34:
+        case 130:
+        case 162:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 16:
+        case 17:
+        case 48:
+        case 49:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 64:
+        case 65:
+        case 68:
+        case 69:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 8:
+        case 12:
+        case 136:
+        case 140:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 3:
+        case 35:
+        case 131:
+        case 163:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 6:
+        case 38:
+        case 134:
+        case 166:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 20:
+        case 21:
+        case 52:
+        case 53:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 144:
+        case 145:
+        case 176:
+        case 177:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 192:
+        case 193:
+        case 196:
+        case 197:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 96:
+        case 97:
+        case 100:
+        case 101:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 40:
+        case 44:
+        case 168:
+        case 172:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 9:
+        case 13:
+        case 137:
+        case 141:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 18:
+        case 50:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL12_0
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 80:
+        case 81:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 72:
+        case 76:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL21_0
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 10:
+        case 138:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 66:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 24:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 7:
+        case 39:
+        case 135:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 148:
+        case 149:
+        case 180:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 224:
+        case 228:
+        case 225:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 41:
+        case 169:
+        case 45:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 22:
+        case 54:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 208:
+        case 209:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 104:
+        case 108:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 11:
+        case 139:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 19:
+        case 51:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 146:
+        case 178:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          break;
+        }
+        case 84:
+        case 85:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 112:
+        case 113:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 200:
+        case 204:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 73:
+        case 77:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 42:
+        case 170:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 14:
+        case 142:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 67:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 70:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 28:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 152:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 194:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 98:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 56:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 25:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 26:
+        case 31:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 82:
+        case 214:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 88:
+        case 248:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 74:
+        case 107:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 27:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 86:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 216:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 106:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 30:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 210:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 120:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 75:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 29:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 198:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 184:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 99:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 57:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 71:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 156:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 226:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 60:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 195:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 102:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 153:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 58:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 83:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 92:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 202:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 78:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 154:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 114:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 89:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 90:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 55:
+        case 23:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 182:
+        case 150:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          break;
+        }
+        case 213:
+        case 212:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 241:
+        case 240:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 236:
+        case 232:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 109:
+        case 105:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 171:
+        case 43:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+            PIXEL11_0
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 143:
+        case 15:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_0
+            PIXEL11_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 124:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 203:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 62:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 211:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 118:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 217:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 110:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 155:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 188:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 185:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 61:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 157:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 103:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 227:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 230:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 199:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 220:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 158:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 234:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_61
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 242:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 59:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL11_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 121:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 87:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL20_61
+          PIXEL21_30
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 79:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 122:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 94:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 218:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 91:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL11_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 229:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 167:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 173:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 181:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 186:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 115:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 93:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          break;
+        }
+        case 206:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 205:
+        case 201:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_10
+            PIXEL21_30
+            PIXEL30_80
+            PIXEL31_10
+          }
+          else
+          {
+            PIXEL20_12
+            PIXEL21_0
+            PIXEL30_20
+            PIXEL31_11
+          }
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 174:
+        case 46:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_80
+            PIXEL01_10
+            PIXEL10_10
+            PIXEL11_30
+          }
+          else
+          {
+            PIXEL00_20
+            PIXEL01_12
+            PIXEL10_11
+            PIXEL11_0
+          }
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 179:
+        case 147:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_10
+            PIXEL03_80
+            PIXEL12_30
+            PIXEL13_10
+          }
+          else
+          {
+            PIXEL02_11
+            PIXEL03_20
+            PIXEL12_0
+            PIXEL13_12
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 117:
+        case 116:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_30
+            PIXEL23_10
+            PIXEL32_10
+            PIXEL33_80
+          }
+          else
+          {
+            PIXEL22_0
+            PIXEL23_11
+            PIXEL32_12
+            PIXEL33_20
+          }
+          PIXEL30_82
+          PIXEL31_32
+          break;
+        }
+        case 189:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 231:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 126:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 219:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 125:
+        {
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL00_82
+            PIXEL10_32
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL00_11
+            PIXEL10_13
+            PIXEL20_83
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_21
+          }
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 221:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL03_81
+            PIXEL13_31
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL03_12
+            PIXEL13_14
+            PIXEL22_70
+            PIXEL23_83
+            PIXEL32_21
+            PIXEL33_50
+          }
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 207:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL02_32
+            PIXEL03_82
+            PIXEL10_0
+            PIXEL11_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_83
+            PIXEL02_13
+            PIXEL03_11
+            PIXEL10_21
+            PIXEL11_70
+          }
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_31
+          PIXEL23_81
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 238:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL21_0
+            PIXEL30_0
+            PIXEL31_0
+            PIXEL32_31
+            PIXEL33_81
+          }
+          else
+          {
+            PIXEL20_21
+            PIXEL21_70
+            PIXEL30_50
+            PIXEL31_83
+            PIXEL32_14
+            PIXEL33_12
+          }
+          PIXEL22_31
+          PIXEL23_81
+          break;
+        }
+        case 190:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+            PIXEL23_32
+            PIXEL33_82
+          }
+          else
+          {
+            PIXEL02_21
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_83
+            PIXEL23_13
+            PIXEL33_11
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          break;
+        }
+        case 187:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+            PIXEL11_0
+            PIXEL20_31
+            PIXEL30_81
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_21
+            PIXEL10_83
+            PIXEL11_70
+            PIXEL20_14
+            PIXEL30_12
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 243:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_82
+          PIXEL21_32
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL22_0
+            PIXEL23_0
+            PIXEL30_82
+            PIXEL31_32
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL22_70
+            PIXEL23_21
+            PIXEL30_11
+            PIXEL31_13
+            PIXEL32_83
+            PIXEL33_50
+          }
+          break;
+        }
+        case 119:
+        {
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL00_81
+            PIXEL01_31
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL12_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL00_12
+            PIXEL01_14
+            PIXEL02_83
+            PIXEL03_50
+            PIXEL12_70
+            PIXEL13_21
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 237:
+        case 233:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_60
+          PIXEL03_20
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_70
+          PIXEL13_60
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 175:
+        case 47:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_70
+          PIXEL23_60
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_60
+          PIXEL33_20
+          break;
+        }
+        case 183:
+        case 151:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_60
+          PIXEL21_70
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_20
+          PIXEL31_60
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 245:
+        case 244:
+        {
+          PIXEL00_20
+          PIXEL01_60
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_60
+          PIXEL11_70
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 250:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          break;
+        }
+        case 123:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 95:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_80
+          PIXEL31_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 222:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 252:
+        {
+          PIXEL00_80
+          PIXEL01_61
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_31
+          PIXEL13_31
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 249:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_61
+          PIXEL03_80
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          break;
+        }
+        case 235:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_61
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 111:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_61
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 63:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_61
+          PIXEL33_80
+          break;
+        }
+        case 159:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_80
+          PIXEL31_61
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 215:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_61
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 246:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_61
+          PIXEL11_30
+          PIXEL12_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 254:
+        {
+          PIXEL00_80
+          PIXEL01_10
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_10
+          PIXEL11_30
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 253:
+        {
+          PIXEL00_82
+          PIXEL01_82
+          PIXEL02_81
+          PIXEL03_81
+          PIXEL10_32
+          PIXEL11_32
+          PIXEL12_31
+          PIXEL13_31
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 251:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_10
+          PIXEL03_80
+          PIXEL11_0
+          PIXEL12_30
+          PIXEL13_10
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          break;
+        }
+        case 239:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_32
+          PIXEL03_82
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_32
+          PIXEL13_82
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_31
+          PIXEL23_81
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_31
+          PIXEL33_81
+          break;
+        }
+        case 127:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL02_0
+            PIXEL03_0
+            PIXEL13_0
+          }
+          else
+          {
+            PIXEL02_50
+            PIXEL03_50
+            PIXEL13_50
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL20_0
+            PIXEL30_0
+            PIXEL31_0
+          }
+          else
+          {
+            PIXEL20_50
+            PIXEL30_50
+            PIXEL31_50
+          }
+          PIXEL21_0
+          PIXEL22_30
+          PIXEL23_10
+          PIXEL32_10
+          PIXEL33_80
+          break;
+        }
+        case 191:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_31
+          PIXEL21_31
+          PIXEL22_32
+          PIXEL23_32
+          PIXEL30_81
+          PIXEL31_81
+          PIXEL32_82
+          PIXEL33_82
+          break;
+        }
+        case 223:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+            PIXEL01_0
+            PIXEL10_0
+          }
+          else
+          {
+            PIXEL00_50
+            PIXEL01_50
+            PIXEL10_50
+          }
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_10
+          PIXEL21_30
+          PIXEL22_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL23_0
+            PIXEL32_0
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL23_50
+            PIXEL32_50
+            PIXEL33_50
+          }
+          PIXEL30_80
+          PIXEL31_10
+          break;
+        }
+        case 247:
+        {
+          PIXEL00_81
+          PIXEL01_31
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_81
+          PIXEL11_31
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_82
+          PIXEL21_32
+          PIXEL22_0
+          PIXEL23_0
+          PIXEL30_82
+          PIXEL31_32
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+        case 255:
+        {
+          if (Diff(w[4], w[2]))
+          {
+            PIXEL00_0
+          }
+          else
+          {
+            PIXEL00_20
+          }
+          PIXEL01_0
+          PIXEL02_0
+          if (Diff(w[2], w[6]))
+          {
+            PIXEL03_0
+          }
+          else
+          {
+            PIXEL03_20
+          }
+          PIXEL10_0
+          PIXEL11_0
+          PIXEL12_0
+          PIXEL13_0
+          PIXEL20_0
+          PIXEL21_0
+          PIXEL22_0
+          PIXEL23_0
+          if (Diff(w[8], w[4]))
+          {
+            PIXEL30_0
+          }
+          else
+          {
+            PIXEL30_20
+          }
+          PIXEL31_0
+          PIXEL32_0
+          if (Diff(w[6], w[8]))
+          {
+            PIXEL33_0
+          }
+          else
+          {
+            PIXEL33_20
+          }
+          break;
+        }
+      }
diff --git a/GLideNHQ/TextureFilters_lq2x.h b/GLideNHQ/TextureFilters_lq2x.h
new file mode 100644
index 00000000..b5318ab8
--- /dev/null
+++ b/GLideNHQ/TextureFilters_lq2x.h
@@ -0,0 +1,1307 @@
+/*
+Copyright (C) 2003 Rice1964
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+
+/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Modified for the Texture Filtering library
+ */
+
+case 0 : 
+case 2 : 
+case 4 : 
+case 6 : 
+case 8 : 
+case 12 : 
+case 16 : 
+case 20 : 
+case 24 : 
+case 28 : 
+case 32 : 
+case 34 : 
+case 36 : 
+case 38 : 
+case 40 : 
+case 44 : 
+case 48 : 
+case 52 : 
+case 56 : 
+case 60 : 
+case 64 : 
+case 66 : 
+case 68 : 
+case 70 : 
+case 96 : 
+case 98 : 
+case 100 : 
+case 102 : 
+case 128 : 
+case 130 : 
+case 132 : 
+case 134 : 
+case 136 : 
+case 140 : 
+case 144 : 
+case 148 : 
+case 152 : 
+case 156 : 
+case 160 : 
+case 162 : 
+case 164 : 
+case 166 : 
+case 168 : 
+case 172 : 
+case 176 : 
+case 180 : 
+case 184 : 
+case 188 : 
+case 192 : 
+case 194 : 
+case 196 : 
+case 198 : 
+case 224 : 
+case 226 : 
+case 228 : 
+case 230 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+} break;
+case 1 : 
+case 5 : 
+case 9 : 
+case 13 : 
+case 17 : 
+case 21 : 
+case 25 : 
+case 29 : 
+case 33 : 
+case 37 : 
+case 41 : 
+case 45 : 
+case 49 : 
+case 53 : 
+case 57 : 
+case 61 : 
+case 65 : 
+case 69 : 
+case 97 : 
+case 101 : 
+case 129 : 
+case 133 : 
+case 137 : 
+case 141 : 
+case 145 : 
+case 149 : 
+case 153 : 
+case 157 : 
+case 161 : 
+case 165 : 
+case 169 : 
+case 173 : 
+case 177 : 
+case 181 : 
+case 185 : 
+case 189 : 
+case 193 : 
+case 197 : 
+case 225 : 
+case 229 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  P3 = IC(1);
+} break;
+case 3 : 
+case 35 : 
+case 67 : 
+case 99 : 
+case 131 : 
+case 163 : 
+case 195 : 
+case 227 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+} break;
+case 7 : 
+case 39 : 
+case 71 : 
+case 103 : 
+case 135 : 
+case 167 : 
+case 199 : 
+case 231 : 
+{
+  P0 = IC(3);
+  P1 = IC(3);
+  P2 = IC(3);
+  P3 = IC(3);
+} break;
+case 10 : 
+case 138 : 
+{
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+} break;
+case 11 : 
+case 27 : 
+case 75 : 
+case 139 : 
+case 155 : 
+case 203 : 
+{
+  P1 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 14 : 
+case 142 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+    P1 = IC(0);
+  } else {
+    P0 = I332(1, 3, 0);
+    P1 = I31(0, 1);
+  }
+} break;
+case 15 : 
+case 143 : 
+case 207 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+    P1 = IC(4);
+  } else {
+    P0 = I332(1, 3, 4);
+    P1 = I31(4, 1);
+  }
+} break;
+case 18 : 
+case 22 : 
+case 30 : 
+case 50 : 
+case 54 : 
+case 62 : 
+case 86 : 
+case 118 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 19 : 
+case 51 : 
+{
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUR) {
+    P0 = IC(2);
+    P1 = IC(2);
+  } else {
+    P0 = I31(2, 1);
+    P1 = I332(1, 5, 2);
+  }
+} break;
+case 23 : 
+case 55 : 
+case 119 : 
+{
+  P2 = IC(3);
+  P3 = IC(3);
+  if (HQ2X_MUR) {
+    P0 = IC(3);
+    P1 = IC(3);
+  } else {
+    P0 = I31(3, 1);
+    P1 = I332(1, 5, 3);
+  }
+} break;
+case 26 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 31 : 
+case 95 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 42 : 
+case 170 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+    P2 = IC(0);
+  } else {
+    P0 = I332(1, 3, 0);
+    P2 = I31(0, 3);
+  }
+} break;
+case 43 : 
+case 171 : 
+case 187 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+    P2 = IC(2);
+  } else {
+    P0 = I332(1, 3, 2);
+    P2 = I31(2, 3);
+  }
+} break;
+case 46 : 
+case 174 : 
+{
+  P1 = IC(0);
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 47 : 
+case 175 : 
+{
+  P1 = IC(4);
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 58 : 
+case 154 : 
+case 186 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 59 : 
+{
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 63 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 72 : 
+case 76 : 
+case 104 : 
+case 106 : 
+case 108 : 
+case 110 : 
+case 120 : 
+case 124 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+} break;
+case 73 : 
+case 77 : 
+case 105 : 
+case 109 : 
+case 125 : 
+{
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P0 = IC(1);
+    P2 = IC(1);
+  } else {
+    P0 = I31(1, 3);
+    P2 = I332(3, 7, 1);
+  }
+} break;
+case 74 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I211(0, 1, 3);
+  }
+} break;
+case 78 : 
+case 202 : 
+case 206 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 79 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I611(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+} break;
+case 80 : 
+case 208 : 
+case 210 : 
+case 216 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 81 : 
+case 209 : 
+case 217 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I211(1, 5, 7);
+  }
+} break;
+case 82 : 
+case 214 : 
+case 222 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 83 : 
+case 115 : 
+{
+  P0 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I611(2, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 84 : 
+case 212 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P1 = IC(0);
+    P3 = IC(0);
+  } else {
+    P1 = I31(0, 5);
+    P3 = I332(5, 7, 0);
+  }
+} break;
+case 85 : 
+case 213 : 
+case 221 : 
+{
+  P0 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P1 = IC(1);
+    P3 = IC(1);
+  } else {
+    P1 = I31(1, 5);
+    P3 = I332(5, 7, 1);
+  }
+} break;
+case 87 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I611(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I211(3, 1, 5);
+  }
+} break;
+case 88 : 
+case 248 : 
+case 250 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 89 : 
+case 93 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I611(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 90 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 91 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I611(2, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I611(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 92 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+} break;
+case 94 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 107 : 
+case 123 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I211(2, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 111 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 112 : 
+case 240 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDR) {
+    P2 = IC(0);
+    P3 = IC(0);
+  } else {
+    P2 = I31(0, 7);
+    P3 = I332(5, 7, 0);
+  }
+} break;
+case 113 : 
+case 241 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDR) {
+    P2 = IC(1);
+    P3 = IC(1);
+  } else {
+    P2 = I31(1, 7);
+    P3 = I332(5, 7, 1);
+  }
+} break;
+case 114 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 116 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+} break;
+case 117 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 121 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I211(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I611(1, 5, 7);
+  }
+} break;
+case 122 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I611(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 126 : 
+{
+  P0 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 127 : 
+{
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I211(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I211(4, 1, 5);
+  }
+} break;
+case 146 : 
+case 150 : 
+case 178 : 
+case 182 : 
+case 190 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+    P3 = IC(0);
+  } else {
+    P1 = I332(1, 5, 0);
+    P3 = I31(0, 5);
+  }
+} break;
+case 147 : 
+case 179 : 
+{
+  P0 = IC(2);
+  P2 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MUR) {
+    P1 = IC(2);
+  } else {
+    P1 = I611(2, 1, 5);
+  }
+} break;
+case 151 : 
+case 183 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  P3 = IC(3);
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 158 : 
+{
+  P2 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 159 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 191 : 
+{
+  P2 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 200 : 
+case 204 : 
+case 232 : 
+case 236 : 
+case 238 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+    P3 = IC(0);
+  } else {
+    P2 = I332(3, 7, 0);
+    P3 = I31(0, 7);
+  }
+} break;
+case 201 : 
+case 205 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I611(1, 3, 7);
+  }
+} break;
+case 211 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+} break;
+case 215 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I211(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 218 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 219 : 
+{
+  P1 = IC(2);
+  P2 = IC(2);
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 220 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I611(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+} break;
+case 223 : 
+{
+  P2 = IC(4);
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I211(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I211(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
+case 233 : 
+case 237 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P3 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+} break;
+case 234 : 
+{
+  P1 = IC(0);
+  P3 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(0);
+  } else {
+    P0 = I611(0, 1, 3);
+  }
+} break;
+case 235 : 
+{
+  P1 = IC(2);
+  P3 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I1411(2, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 239 : 
+{
+  P1 = IC(4);
+  P3 = IC(4);
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+} break;
+case 242 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I211(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I611(0, 1, 5);
+  }
+} break;
+case 243 : 
+{
+  P0 = IC(2);
+  P1 = IC(2);
+  if (HQ2X_MDR) {
+    P2 = IC(2);
+    P3 = IC(2);
+  } else {
+    P2 = I31(2, 7);
+    P3 = I332(5, 7, 2);
+  }
+} break;
+case 244 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+} break;
+case 245 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  P2 = IC(1);
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I1411(1, 5, 7);
+  }
+} break;
+case 246 : 
+{
+  P0 = IC(0);
+  P2 = IC(0);
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 247 : 
+{
+  P0 = IC(3);
+  P2 = IC(3);
+  if (HQ2X_MDR) {
+    P3 = IC(3);
+  } else {
+    P3 = I1411(3, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(3);
+  } else {
+    P1 = I1411(3, 1, 5);
+  }
+} break;
+case 249 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I211(1, 5, 7);
+  }
+} break;
+case 251 : 
+{
+  P1 = IC(2);
+  if (HQ2X_MDL) {
+    P2 = IC(2);
+  } else {
+    P2 = I1411(2, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(2);
+  } else {
+    P3 = I211(2, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(2);
+  } else {
+    P0 = I211(2, 1, 3);
+  }
+} break;
+case 252 : 
+{
+  P0 = IC(0);
+  P1 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+} break;
+case 253 : 
+{
+  P0 = IC(1);
+  P1 = IC(1);
+  if (HQ2X_MDL) {
+    P2 = IC(1);
+  } else {
+    P2 = I1411(1, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(1);
+  } else {
+    P3 = I1411(1, 5, 7);
+  }
+} break;
+case 254 : 
+{
+  P0 = IC(0);
+  if (HQ2X_MDL) {
+    P2 = IC(0);
+  } else {
+    P2 = I211(0, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(0);
+  } else {
+    P3 = I1411(0, 5, 7);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(0);
+  } else {
+    P1 = I211(0, 1, 5);
+  }
+} break;
+case 255 : 
+{
+  if (HQ2X_MDL) {
+    P2 = IC(4);
+  } else {
+    P2 = I1411(4, 3, 7);
+  }
+  if (HQ2X_MDR) {
+    P3 = IC(4);
+  } else {
+    P3 = I1411(4, 5, 7);
+  }
+  if (HQ2X_MUL) {
+    P0 = IC(4);
+  } else {
+    P0 = I1411(4, 1, 3);
+  }
+  if (HQ2X_MUR) {
+    P1 = IC(4);
+  } else {
+    P1 = I1411(4, 1, 5);
+  }
+} break;
diff --git a/GLideNHQ/TxCache.cpp b/GLideNHQ/TxCache.cpp
new file mode 100644
index 00000000..a89e16f2
--- /dev/null
+++ b/GLideNHQ/TxCache.cpp
@@ -0,0 +1,449 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+#include "TxCache.h"
+#include "TxDbg.h"
+#include <zlib.h>
+#include <boost/filesystem.hpp>
+
+TxCache::~TxCache()
+{
+  /* free memory, clean up, etc */
+  clear();
+
+  delete _txUtil;
+}
+
+TxCache::TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+				 dispInfoFuncExt callback)
+{
+  _txUtil = new TxUtil();
+
+  _options = options;
+  _cacheSize = cachesize;
+  _callback = callback;
+  _totalSize = 0;
+
+  /* save path name */
+  if (path)
+	_path.assign(path);
+
+  /* save ROM name */
+  if (ident)
+	_ident.assign(ident);
+
+  /* zlib memory buffers to (de)compress hires textures */
+  if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) {
+	_gzdest0   = TxMemBuf::getInstance()->get(0);
+	_gzdest1   = TxMemBuf::getInstance()->get(1);
+	_gzdestLen = (TxMemBuf::getInstance()->size_of(0) < TxMemBuf::getInstance()->size_of(1)) ?
+				  TxMemBuf::getInstance()->size_of(0) : TxMemBuf::getInstance()->size_of(1);
+
+	if (!_gzdest0 || !_gzdest1 || !_gzdestLen) {
+	  _options &= ~(GZ_TEXCACHE|GZ_HIRESTEXCACHE);
+	  _gzdest0 = NULL;
+	  _gzdest1 = NULL;
+	  _gzdestLen = 0;
+	}
+  }
+}
+
+boolean
+TxCache::add(uint64 checksum, GHQTexInfo *info, int dataSize)
+{
+  /* NOTE: dataSize must be provided if info->data is zlib compressed. */
+
+  if (!checksum || !info->data) return 0;
+
+  uint8 *dest = info->data;
+  uint16 format = info->format;
+
+  if (!dataSize) {
+	dataSize = _txUtil->sizeofTx(info->width, info->height, info->format);
+
+	if (!dataSize) return 0;
+
+	if (_options & (GZ_TEXCACHE|GZ_HIRESTEXCACHE)) {
+	  /* zlib compress it. compression level:1 (best speed) */
+	  uint32 destLen = _gzdestLen;
+	  dest = (dest == _gzdest0) ? _gzdest1 : _gzdest0;
+	  if (compress2(dest, &destLen, info->data, dataSize, 1) != Z_OK) {
+		dest = info->data;
+		DBG_INFO(80, L"Error: zlib compression failed!\n");
+	  } else {
+		DBG_INFO(80, L"zlib compressed: %.02fkb->%.02fkb\n", (float)dataSize/1000, (float)destLen/1000);
+		dataSize = destLen;
+		format |= GR_TEXFMT_GZ;
+	  }
+	}
+  }
+
+  /* if cache size exceeds limit, remove old cache */
+  if (_cacheSize > 0) {
+	_totalSize += dataSize;
+	if ((_totalSize > _cacheSize) && !_cachelist.empty()) {
+	  /* _cachelist is arranged so that frequently used textures are in the back */
+	  std::list<uint64>::iterator itList = _cachelist.begin();
+	  while (itList != _cachelist.end()) {
+		/* find it in _cache */
+		std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(*itList);
+		if (itMap != _cache.end()) {
+		  /* yep we have it. remove it. */
+		  _totalSize -= (*itMap).second->size;
+		  free((*itMap).second->info.data);
+		  delete (*itMap).second;
+		  _cache.erase(itMap);
+		}
+		itList++;
+
+		/* check if memory cache has enough space */
+		if (_totalSize <= _cacheSize)
+		  break;
+	  }
+	  /* remove from _cachelist */
+	  _cachelist.erase(_cachelist.begin(), itList);
+
+	  DBG_INFO(80, L"+++++++++\n");
+	}
+	_totalSize -= dataSize;
+  }
+
+  /* cache it */
+  uint8 *tmpdata = (uint8*)malloc(dataSize);
+  if (tmpdata) {
+	TXCACHE *txCache = new TXCACHE;
+	if (txCache) {
+	  /* we can directly write as we filter, but for now we get away
+	   * with doing memcpy after all the filtering is done.
+	   */
+	  memcpy(tmpdata, dest, dataSize);
+
+	  /* copy it */
+	  memcpy(&txCache->info, info, sizeof(GHQTexInfo));
+	  txCache->info.data = tmpdata;
+	  txCache->info.format = format;
+	  txCache->size = dataSize;
+
+	  /* add to cache */
+	  if (_cacheSize > 0) {
+		_cachelist.push_back(checksum);
+		txCache->it = --(_cachelist.end());
+	  }
+	  /* _cache[checksum] = txCache; */
+	  _cache.insert(std::map<uint64, TXCACHE*>::value_type(checksum, txCache));
+
+#ifdef DEBUG
+	  DBG_INFO(80, L"[%5d] added!! crc:%08X %08X %d x %d gfmt:%x total:%.02fmb\n",
+			   _cache.size(), (uint32)(checksum >> 32), (uint32)(checksum & 0xffffffff),
+			   info->width, info->height, info->format, (float)_totalSize/1000000);
+
+	  DBG_INFO(80, L"smalllodlog2:%d largelodlog2:%d aspectratiolog2:%d\n",
+			   txCache->info.smallLodLog2, txCache->info.largeLodLog2, txCache->info.aspectRatioLog2);
+
+	  if (info->tiles) {
+		DBG_INFO(80, L"tiles:%d un-tiled size:%d x %d\n", info->tiles, info->untiled_width, info->untiled_height);
+	  }
+
+	  if (_cacheSize > 0) {
+		DBG_INFO(80, L"cache max config:%.02fmb\n", (float)_cacheSize/1000000);
+
+		if (_cache.size() != _cachelist.size()) {
+		  DBG_INFO(80, L"Error: cache/cachelist mismatch! (%d/%d)\n", _cache.size(), _cachelist.size());
+		}
+	  }
+#endif
+
+	  /* total cache size */
+	  _totalSize += dataSize;
+
+	  return 1;
+	}
+	free(tmpdata);
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::get(uint64 checksum, GHQTexInfo *info)
+{
+  if (!checksum || _cache.empty()) return 0;
+
+  /* find a match in cache */
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) {
+	/* yep, we've got it. */
+	memcpy(info, &(((*itMap).second)->info), sizeof(GHQTexInfo));
+
+	/* push it to the back of the list */
+	if (_cacheSize > 0) {
+	  _cachelist.erase(((*itMap).second)->it);
+	  _cachelist.push_back(checksum);
+	  ((*itMap).second)->it = --(_cachelist.end());
+	}
+
+	/* zlib decompress it */
+	if (info->format & GR_TEXFMT_GZ) {
+	  uint32 destLen = _gzdestLen;
+	  uint8 *dest = (_gzdest0 == info->data) ? _gzdest1 : _gzdest0;
+	  if (uncompress(dest, &destLen, info->data, ((*itMap).second)->size) != Z_OK) {
+		DBG_INFO(80, L"Error: zlib decompression failed!\n");
+		return 0;
+	  }
+	  info->data = dest;
+	  info->format &= ~GR_TEXFMT_GZ;
+	  DBG_INFO(80, L"zlib decompressed: %.02fkb->%.02fkb\n", (float)(((*itMap).second)->size)/1000, (float)destLen/1000);
+	}
+
+	return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::save(const wchar_t *path, const wchar_t *filename, int config)
+{
+  if (!_cache.empty()) {
+	/* dump cache to disk */
+	char cbuf[MAX_PATH];
+
+	boost::filesystem::wpath cachepath(path);
+	boost::filesystem::create_directory(cachepath);
+
+	/* Ugly hack to enable fopen/gzopen in Win9x */
+#ifdef WIN32
+	wchar_t curpath[MAX_PATH];
+	GETCWD(MAX_PATH, curpath);
+	CHDIR(cachepath.wstring().c_str());
+#else
+	char curpath[MAX_PATH];
+	wcstombs(cbuf, cachepath.string().c_str(), MAX_PATH);
+	GETCWD(MAX_PATH, curpath);
+	CHDIR(cbuf);
+#endif
+
+	wcstombs(cbuf, filename, MAX_PATH);
+
+	gzFile gzfp = gzopen(cbuf, "wb1");
+	DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename);
+	if (gzfp) {
+	  /* write header to determine config match */
+	  gzwrite(gzfp, &config, 4);
+
+	  std::map<uint64, TXCACHE*>::iterator itMap = _cache.begin();
+	  while (itMap != _cache.end()) {
+		uint8 *dest    = (*itMap).second->info.data;
+		uint32 destLen = (*itMap).second->size;
+		uint16 format  = (*itMap).second->info.format;
+
+		/* to keep things simple, we save the texture data in a zlib uncompressed state. */
+		/* sigh... for those who cannot wait the extra few seconds. changed to keep
+		 * texture data in a zlib compressed state. if the GZ_TEXCACHE or GZ_HIRESTEXCACHE
+		 * option is toggled, the cache will need to be rebuilt.
+		 */
+		/*if (format & GR_TEXFMT_GZ) {
+		  dest = _gzdest0;
+		  destLen = _gzdestLen;
+		  if (dest && destLen) {
+			if (uncompress(dest, &destLen, (*itMap).second->info.data, (*itMap).second->size) != Z_OK) {
+			  dest = NULL;
+			  destLen = 0;
+			}
+			format &= ~GR_TEXFMT_GZ;
+		  }
+		}*/
+
+		if (dest && destLen) {
+		  /* texture checksum */
+		  gzwrite(gzfp, &((*itMap).first), 8);
+
+		  /* other texture info */
+		  gzwrite(gzfp, &((*itMap).second->info.width), 4);
+		  gzwrite(gzfp, &((*itMap).second->info.height), 4);
+		  gzwrite(gzfp, &format, 2);
+
+		  gzwrite(gzfp, &((*itMap).second->info.smallLodLog2), 4);
+		  gzwrite(gzfp, &((*itMap).second->info.largeLodLog2), 4);
+		  gzwrite(gzfp, &((*itMap).second->info.aspectRatioLog2), 4);
+
+		  gzwrite(gzfp, &((*itMap).second->info.tiles), 4);
+		  gzwrite(gzfp, &((*itMap).second->info.untiled_width), 4);
+		  gzwrite(gzfp, &((*itMap).second->info.untiled_height), 4);
+
+		  gzwrite(gzfp, &((*itMap).second->info.is_hires_tex), 1);
+
+		  gzwrite(gzfp, &destLen, 4);
+		  gzwrite(gzfp, dest, destLen);
+		}
+
+		itMap++;
+
+		/* not ready yet */
+		/*if (_callback)
+		  (*_callback)(L"Total textures saved to HDD: %d\n", std::distance(itMap, _cache.begin()));*/
+	  }
+	  gzclose(gzfp);
+	}
+
+	CHDIR(curpath);
+  }
+
+  return _cache.empty();
+}
+
+boolean
+TxCache::load(const wchar_t *path, const wchar_t *filename, int config)
+{
+  /* find it on disk */
+  char cbuf[MAX_PATH];
+
+  boost::filesystem::wpath cachepath(path);
+
+#ifdef WIN32
+  wchar_t curpath[MAX_PATH];
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(cachepath.wstring().c_str());
+#else
+  char curpath[MAX_PATH];
+  wcstombs(cbuf, cachepath.string().c_str(), MAX_PATH);
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(cbuf);
+#endif
+
+  wcstombs(cbuf, filename, MAX_PATH);
+
+  gzFile gzfp = gzopen(cbuf, "rb");
+  DBG_INFO(80, L"gzfp:%x file:%ls\n", gzfp, filename);
+  if (gzfp) {
+	/* yep, we have it. load it into memory cache. */
+	int dataSize;
+	uint64 checksum;
+	GHQTexInfo tmpInfo;
+	int tmpconfig;
+	/* read header to determine config match */
+	gzread(gzfp, &tmpconfig, 4);
+
+	if (tmpconfig == config) {
+	  do {
+		memset(&tmpInfo, 0, sizeof(GHQTexInfo));
+
+		gzread(gzfp, &checksum, 8);
+
+		gzread(gzfp, &tmpInfo.width, 4);
+		gzread(gzfp, &tmpInfo.height, 4);
+		gzread(gzfp, &tmpInfo.format, 2);
+
+		gzread(gzfp, &tmpInfo.smallLodLog2, 4);
+		gzread(gzfp, &tmpInfo.largeLodLog2, 4);
+		gzread(gzfp, &tmpInfo.aspectRatioLog2, 4);
+
+		gzread(gzfp, &tmpInfo.tiles, 4);
+		gzread(gzfp, &tmpInfo.untiled_width, 4);
+		gzread(gzfp, &tmpInfo.untiled_height, 4);
+
+		gzread(gzfp, &tmpInfo.is_hires_tex, 1);
+
+		gzread(gzfp, &dataSize, 4);
+
+		tmpInfo.data = (uint8*)malloc(dataSize);
+		if (tmpInfo.data) {
+		  gzread(gzfp, tmpInfo.data, dataSize);
+
+		  /* add to memory cache */
+		  add(checksum, &tmpInfo, (tmpInfo.format & GR_TEXFMT_GZ) ? dataSize : 0);
+
+		  free(tmpInfo.data);
+		} else {
+		  gzseek(gzfp, dataSize, SEEK_CUR);
+		}
+
+		/* skip in between to prevent the loop from being tied down to vsync */
+		if (_callback && (!(_cache.size() % 100) || gzeof(gzfp)))
+		  (*_callback)(L"[%d] total mem:%.02fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, filename);
+
+	  } while (!gzeof(gzfp));
+	  gzclose(gzfp);
+	}
+  }
+
+  CHDIR(curpath);
+
+  return !_cache.empty();
+}
+
+boolean
+TxCache::del(uint64 checksum)
+{
+  if (!checksum || _cache.empty()) return 0;
+
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) {
+
+	/* for texture cache (not hi-res cache) */
+	if (!_cachelist.empty()) _cachelist.erase(((*itMap).second)->it);
+
+	/* remove from cache */
+	free((*itMap).second->info.data);
+	_totalSize -= (*itMap).second->size;
+	delete (*itMap).second;
+	_cache.erase(itMap);
+
+	DBG_INFO(80, L"removed from cache: checksum = %08X %08X\n", (uint32)(checksum & 0xffffffff), (uint32)(checksum >> 32));
+
+	return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxCache::is_cached(uint64 checksum)
+{
+  std::map<uint64, TXCACHE*>::iterator itMap = _cache.find(checksum);
+  if (itMap != _cache.end()) return 1;
+
+  return 0;
+}
+
+void
+TxCache::clear()
+{
+  if (!_cache.empty()) {
+	std::map<uint64, TXCACHE*>::iterator itMap = _cache.begin();
+	while (itMap != _cache.end()) {
+	  free((*itMap).second->info.data);
+	  delete (*itMap).second;
+	  itMap++;
+	}
+	_cache.clear();
+  }
+
+  if (!_cachelist.empty()) _cachelist.clear();
+
+  _totalSize = 0;
+}
diff --git a/GLideNHQ/TxCache.h b/GLideNHQ/TxCache.h
new file mode 100644
index 00000000..0b31b544
--- /dev/null
+++ b/GLideNHQ/TxCache.h
@@ -0,0 +1,69 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXCACHE_H__
+#define __TXCACHE_H__
+
+#include "TxInternal.h"
+#include "TxUtil.h"
+#include <list>
+#include <map>
+#include <string>
+
+class TxCache
+{
+private:
+  std::list<uint64> _cachelist;
+  uint8 *_gzdest0;
+  uint8 *_gzdest1;
+  uint32 _gzdestLen;
+protected:
+  int _options;
+  std::wstring _ident;
+  std::wstring _path;
+  dispInfoFuncExt _callback;
+  TxUtil *_txUtil;
+  struct TXCACHE {
+    int size;
+    GHQTexInfo info;
+    std::list<uint64>::iterator it;
+  };
+  int _totalSize;
+  int _cacheSize;
+  std::map<uint64, TXCACHE*> _cache;
+  boolean save(const wchar_t *path, const wchar_t *filename, const int config);
+  boolean load(const wchar_t *path, const wchar_t *filename, const int config);
+  boolean del(uint64 checksum); /* checksum hi:palette low:texture */
+  boolean is_cached(uint64 checksum); /* checksum hi:palette low:texture */
+  void clear();
+public:
+  ~TxCache();
+  TxCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+              dispInfoFuncExt callback);
+  boolean add(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info, int dataSize = 0);
+  boolean get(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info);
+};
+
+#endif /* __TXCACHE_H__ */
diff --git a/GLideNHQ/TxDbg.cpp b/GLideNHQ/TxDbg.cpp
new file mode 100644
index 00000000..a54cba4c
--- /dev/null
+++ b/GLideNHQ/TxDbg.cpp
@@ -0,0 +1,75 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DBG_LEVEL 80
+
+#include "TxDbg.h"
+#include <string.h>
+#include <stdarg.h>
+#include <boost/format.hpp>
+
+TxDbg::TxDbg()
+{
+  _level = DBG_LEVEL;
+
+  if (!_dbgfile)
+#ifdef GHQCHK
+    _dbgfile = fopen("ghqchk.txt", "w");
+#else
+    _dbgfile = fopen("glidehq.dbg", "w");
+#endif
+}
+
+TxDbg::~TxDbg()
+{
+  if (_dbgfile) {
+    fclose(_dbgfile);
+    _dbgfile = 0;
+  }
+
+  _level = DBG_LEVEL;
+}
+
+void
+TxDbg::output(const int level, const wchar_t *format, ...)
+{
+  va_list args;
+  //wchar_t newformat[4095];
+  std::wstring newformat;
+
+  if (level > _level)
+    return;
+
+  va_start(args, format);
+  //swprintf(newformat, 4095, L"%d:\t", level);
+  //wcscat(newformat, format);
+  //vfwprintf(_dbgfile, newformat, args);
+  newformat = boost::str(boost::wformat(L"%d:\t%ls") % level % format);
+  vfwprintf(_dbgfile, newformat.c_str(), args);
+  fflush(_dbgfile);
+#ifdef GHQCHK
+  //vwprintf(newformat, args);
+  vwprintf(newformat.c_str(), args);
+#endif
+  va_end(args);
+}
diff --git a/GLideNHQ/TxDbg.h b/GLideNHQ/TxDbg.h
new file mode 100644
index 00000000..0a64d6ac
--- /dev/null
+++ b/GLideNHQ/TxDbg.h
@@ -0,0 +1,61 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXDBG_H__
+#define __TXDBG_H__
+
+#include <stdio.h>
+#include "TxInternal.h"
+
+class TxDbg
+{
+private:
+  FILE* _dbgfile;
+  int _level;
+  TxDbg();
+public:
+  static TxDbg* getInstance() {
+	static TxDbg txDbg;
+	return &txDbg;
+  }
+  ~TxDbg();
+  void output(const int level, const wchar_t *format, ...);
+};
+
+#ifdef DEBUG
+#define DBG_INFO TxDbg::getInstance()->output
+#define INFO DBG_INFO
+#else
+#define DBG_INFO(A, ...)
+#ifdef GHQCHK
+#define INFO TxDbg::getInstance()->output
+#else
+#if 0 /* XXX enable this to log basic hires texture checks */
+#define INFO TxDbg::getInstance()->output
+#else
+#define INFO DBG_INFO
+#endif
+#endif
+#endif
+
+#endif /* __TXDBG_H__ */
diff --git a/GLideNHQ/TxFilter.cpp b/GLideNHQ/TxFilter.cpp
new file mode 100644
index 00000000..58b44648
--- /dev/null
+++ b/GLideNHQ/TxFilter.cpp
@@ -0,0 +1,683 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+#include "TxFilter.h"
+#include "TextureFilters.h"
+#include "TxDbg.h"
+#include "bldno.h"
+#include <boost/thread.hpp>
+#include <boost/bind.hpp>
+#include <boost/format.hpp>
+
+void TxFilter::clear()
+{
+  /* clear hires texture cache */
+  delete _txHiResCache;
+
+  /* clear texture cache */
+  delete _txTexCache;
+
+  /* free memory */
+  TxMemBuf::getInstance()->shutdown();
+
+  /* clear other stuff */
+  delete _txImage;
+  delete _txQuantize;
+  delete _txUtil;
+}
+
+TxFilter::~TxFilter()
+{
+  clear();
+}
+
+TxFilter::TxFilter(int maxwidth, int maxheight, int maxbpp, int options,
+                   int cachesize, wchar_t *path, wchar_t *ident,
+                   dispInfoFuncExt callback)
+{
+  /* HACKALERT: the emulator misbehaves and sometimes forgets to shutdown */
+  if ((ident && wcscmp(ident, L"DEFAULT") != 0 && _ident.compare(ident) == 0) &&
+      _maxwidth  == maxwidth  &&
+      _maxheight == maxheight &&
+      _maxbpp    == maxbpp    &&
+      _options   == options   &&
+      _cacheSize == cachesize) return;
+  clear(); /* gcc does not allow the destructor to be called */
+
+  /* shamelessness :P this first call to the debug output message creates
+   * a file in the executable directory. */
+  INFO(0, L"------------------------------------------------------------------\n");
+#ifdef GHQCHK
+  INFO(0, L" GlideHQ Hires Texture Checker 1.02.00.%d\n", BUILD_NUMBER);
+#else
+  INFO(0, L" GlideHQ version 1.02.00.%d\n", BUILD_NUMBER);
+#endif
+  INFO(0, L" Copyright (C) 2010  Hiroshi Morii   All Rights Reserved\n");
+  INFO(0, L"    email   : koolsmoky(at)users.sourceforge.net\n");
+  INFO(0, L"    website : http://www.3dfxzone.it/koolsmoky\n");
+  INFO(0, L"\n");
+  INFO(0, L" Glide64 official website : http://glide64.emuxhaven.net\n");
+  INFO(0, L"------------------------------------------------------------------\n");
+
+  _options = options;
+
+  _txImage      = new TxImage();
+  _txQuantize   = new TxQuantize();
+  _txUtil       = new TxUtil();
+
+  /* get number of CPU cores. */
+  _numcore = _txUtil->getNumberofProcessors();
+
+  _initialized = 0;
+
+  _tex1 = NULL;
+  _tex2 = NULL;
+
+  /* XXX: anything larger than 1024 * 1024 is overkill */
+  _maxwidth  = maxwidth  > 1024 ? 1024 : maxwidth;
+  _maxheight = maxheight > 1024 ? 1024 : maxheight;
+  _maxbpp    = maxbpp;
+
+  _cacheSize = cachesize;
+
+  /* TODO: validate options and do overrides here*/
+
+  /* save path name */
+  if (path)
+    _path.assign(path);
+
+  /* save ROM name */
+  if (ident && wcscmp(ident, L"DEFAULT") != 0)
+    _ident.assign(ident);
+
+  /* check for dxtn extensions */
+  if (!TxLoadLib::getInstance()->getdxtCompressTexFuncExt())
+    _options &= ~S3TC_COMPRESSION;
+
+  if (!TxLoadLib::getInstance()->getfxtCompressTexFuncExt())
+    _options &= ~FXT1_COMPRESSION;
+
+  switch (options & COMPRESSION_MASK) {
+  case FXT1_COMPRESSION:
+  case S3TC_COMPRESSION:
+    break;
+  case NCC_COMPRESSION:
+  default:
+    _options &= ~COMPRESSION_MASK;
+  }
+
+  if (TxMemBuf::getInstance()->init(_maxwidth, _maxheight)) {
+    if (!_tex1)
+      _tex1 = TxMemBuf::getInstance()->get(0);
+
+    if (!_tex2)
+      _tex2 = TxMemBuf::getInstance()->get(1);
+  }
+
+#if !_16BPP_HACK
+  /* initialize hq4x filter */
+  hq4x_init();
+#endif
+
+  /* initialize texture cache in bytes. 128Mb will do nicely in most cases */
+  _txTexCache = new TxTexCache(_options, _cacheSize, _path.c_str(), _ident.c_str(), callback);
+
+  /* hires texture */
+#if HIRES_TEXTURE
+  _txHiResCache = new TxHiResCache(_maxwidth, _maxheight, _maxbpp, _options, _path.c_str(), _ident.c_str(), callback);
+
+  if (_txHiResCache->empty())
+    _options &= ~HIRESTEXTURES_MASK;
+#endif
+
+  if (!(_options & COMPRESS_TEX))
+    _options &= ~COMPRESSION_MASK;
+
+  if (_tex1 && _tex2)
+      _initialized = 1;
+}
+
+boolean
+TxFilter::filter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat, uint64 g64crc, GHQTexInfo *info)
+{
+  uint8 *texture = src;
+  uint8 *tmptex = _tex1;
+  uint16 destformat = srcformat;
+
+  /* We need to be initialized first! */
+  if (!_initialized) return 0;
+
+  /* find cached textures */
+  if (_cacheSize) {
+
+    /* calculate checksum of source texture */
+    if (!g64crc)
+      g64crc = (uint64)(_txUtil->checksumTx(texture, srcwidth, srcheight, srcformat));
+
+    DBG_INFO(80, L"filter: crc:%08X %08X %d x %d gfmt:%x\n",
+             (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff), srcwidth, srcheight, srcformat);
+
+#if 0 /* use hirestex to retrieve cached textures. */
+    /* check if we have it in cache */
+    if (!(g64crc & 0xffffffff00000000) && /* we reach here only when there is no hires texture for this crc */
+        _txTexCache->get(g64crc, info)) {
+      DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+      return 1; /* yep, we've got it */
+    }
+#endif
+  }
+
+  /* Leave small textures alone because filtering makes little difference.
+   * Moreover, some filters require at least 4 * 4 to work.
+   * Bypass _options to do ARGB8888->16bpp if _maxbpp=16 or forced color reduction.
+   */
+  if ((srcwidth >= 4 && srcheight >= 4) &&
+      ((_options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESSION_MASK)) ||
+       (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)))) {
+
+#if !_16BPP_HACK
+    /* convert textures to a format that the compressor accepts (ARGB8888) */
+    if (_options & COMPRESSION_MASK) {
+#endif
+      if (srcformat != GR_TEXFMT_ARGB_8888) {
+        if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, srcformat, GR_TEXFMT_ARGB_8888)) {
+          DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat);
+          return 0;
+        }
+        texture = tmptex;
+        destformat = GR_TEXFMT_ARGB_8888;
+      }
+#if !_16BPP_HACK
+    }
+#endif
+
+    switch (destformat) {
+    case GR_TEXFMT_ARGB_8888:
+
+      /*
+       * prepare texture enhancements (x2, x4 scalers)
+       */
+      int scale_shift = 0, num_filters = 0;
+      uint32 filter = 0;
+
+      if ((_options & ENHANCEMENT_MASK) == HQ4X_ENHANCEMENT) {
+        if (srcwidth  <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) {
+          filter |= HQ4X_ENHANCEMENT;
+          scale_shift = 2;
+          num_filters++;
+        } else if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          filter |= HQ2X_ENHANCEMENT;
+          scale_shift = 1;
+          num_filters++;
+        }
+      } else if (_options & ENHANCEMENT_MASK) {
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          filter |= (_options & ENHANCEMENT_MASK);
+          scale_shift = 1;
+          num_filters++;
+        }
+      }
+
+      /*
+       * prepare texture filters
+       */
+      if (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK)) {
+        filter |= (_options & (SMOOTH_FILTER_MASK|SHARP_FILTER_MASK));
+        num_filters++;
+      }
+
+      /*
+       * execute texture enhancements and filters
+       */
+      while (num_filters > 0) {
+
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+        uint8 *_texture = texture;
+        uint8 *_tmptex  = tmptex;
+
+        unsigned int numcore = _numcore;
+        unsigned int blkrow = 0;
+        while (numcore > 1 && blkrow == 0) {
+          blkrow = (srcheight >> 2) / numcore;
+          numcore--;
+        }
+        if (blkrow > 0 && numcore > 1) {
+          boost::thread *thrd[MAX_NUMCORE];
+          unsigned int i;
+          int blkheight = blkrow << 2;
+          unsigned int srcStride = (srcwidth * blkheight) << 2;
+          unsigned int destStride = srcStride << scale_shift << scale_shift;
+          for (i = 0; i < numcore - 1; i++) {
+            thrd[i] = new boost::thread(boost::bind(filter_8888,
+                                                    (uint32*)_texture,
+                                                    srcwidth,
+                                                    blkheight,
+                                                    (uint32*)_tmptex,
+                                                    filter));
+            _texture += srcStride;
+            _tmptex  += destStride;
+          }
+          thrd[i] = new boost::thread(boost::bind(filter_8888,
+                                                  (uint32*)_texture,
+                                                  srcwidth,
+                                                  srcheight - blkheight * i,
+                                                  (uint32*)_tmptex,
+                                                  filter));
+          for (i = 0; i < numcore; i++) {
+            thrd[i]->join();
+            delete thrd[i];
+          }
+        } else {
+          filter_8888((uint32*)_texture, srcwidth, srcheight, (uint32*)_tmptex, filter);
+        }
+
+        if (filter & ENHANCEMENT_MASK) {
+          srcwidth  <<= scale_shift;
+          srcheight <<= scale_shift;
+          filter &= ~ENHANCEMENT_MASK;
+          scale_shift = 0;
+        }
+
+        texture = tmptex;
+        num_filters--;
+      }
+
+      /*
+       * texture compression
+       */
+      /* ignored if we only have texture compression option on.
+       * only done when texture enhancer is used. see constructor. */
+      if ((_options & COMPRESSION_MASK) &&
+          (srcwidth >= 64 && srcheight >= 64) /* Texture compression is not suitable for low pixel coarse detail
+                                               * textures. The assumption here is that textures larger than 64x64
+                                               * have enough detail to produce decent quality when compressed. The
+                                               * down side is that narrow stripped textures that the N64 often use
+                                               * for large background textures are also ignored. It would be more
+                                               * reasonable if decisions are made based on fourier-transform
+                                               * spectrum or RMS error.
+                                               */
+          ) {
+        int compressionType = _options & COMPRESSION_MASK;
+        int tmpwidth, tmpheight;
+        uint16 tmpformat;
+        /* XXX: textures that use 8bit alpha channel look bad with the current
+         * fxt1 library, so we substitute it with dxtn for now. afaik all gfx
+         * cards that support fxt1 also support dxtn. (3dfx and Intel) */
+        if ((destformat == GR_TEXFMT_ALPHA_INTENSITY_88) ||
+            (destformat == GR_TEXFMT_ARGB_8888) ||
+            (destformat == GR_TEXFMT_ALPHA_8)) {
+          compressionType = S3TC_COMPRESSION;
+        }
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        if (_txQuantize->compress(texture, tmptex,
+                                  srcwidth, srcheight, srcformat,
+                                  &tmpwidth, &tmpheight, &tmpformat,
+                                  compressionType)) {
+          srcwidth = tmpwidth;
+          srcheight = tmpheight;
+          destformat = tmpformat;
+          texture = tmptex;
+        }
+      }
+
+
+      /*
+       * texture (re)conversions
+       */
+      if (destformat == GR_TEXFMT_ARGB_8888) {
+        if (srcformat == GR_TEXFMT_ARGB_8888 && (_maxbpp < 32 || _options & FORCE16BPP_TEX)) srcformat = GR_TEXFMT_ARGB_4444;
+        if (srcformat != GR_TEXFMT_ARGB_8888) {
+          tmptex = (texture == _tex1) ? _tex2 : _tex1;
+          if (!_txQuantize->quantize(texture, tmptex, srcwidth, srcheight, GR_TEXFMT_ARGB_8888, srcformat)) {
+            DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", srcformat);
+            return 0;
+          }
+          texture = tmptex;
+          destformat = srcformat;
+        }
+      }
+
+      break;
+#if !_16BPP_HACK
+    case GR_TEXFMT_ARGB_4444:
+
+      int scale_shift = 0;
+      tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+      switch (_options & ENHANCEMENT_MASK) {
+      case HQ4X_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 2) && srcheight <= (_maxheight >> 2)) {
+          hq4x_4444((uint8*)texture, (uint8*)tmptex, srcwidth, srcheight, srcwidth, srcwidth * 4 * 2);
+          scale_shift = 2;
+        }/* else if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }*/
+        break;
+      case HQ2X_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case HQ2XS_ENHANCEMENT:
+        if (srcwidth <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          hq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case LQ2X_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          lq2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case LQ2XS_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          lq2xS_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+        break;
+      case X2SAI_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          Super2xSaI_4444((uint16*)texture, (uint16*)tmptex, srcwidth, srcheight, srcwidth);
+          scale_shift = 1;
+        }
+        break;
+      case X2_ENHANCEMENT:
+        if (srcwidth  <= (_maxwidth >> 1) && srcheight <= (_maxheight >> 1)) {
+          Texture2x_16((uint8*)texture, srcwidth * 2, (uint8*)tmptex, srcwidth * 2 * 2, srcwidth, srcheight);
+          scale_shift = 1;
+        }
+      }
+      if (scale_shift) {
+        srcwidth <<= scale_shift;
+        srcheight <<= scale_shift;
+        texture = tmptex;
+      }
+
+      if (_options & SMOOTH_FILTER_MASK) {
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        SmoothFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SMOOTH_FILTER_MASK));
+        texture = tmptex;
+      } else if (_options & SHARP_FILTER_MASK) {
+        tmptex = (texture == _tex1) ? _tex2 : _tex1;
+        SharpFilter_4444((uint16*)texture, srcwidth, srcheight, (uint16*)tmptex, (_options & SHARP_FILTER_MASK));
+        texture = tmptex;
+      }
+
+      break;
+    case GR_TEXFMT_ARGB_1555:
+      break;
+    case GR_TEXFMT_RGB_565:
+      break;
+    case GR_TEXFMT_ALPHA_8:
+      break;
+#endif /* _16BPP_HACK */
+    }
+  }
+
+  /* fill in the texture info. */
+  info->data = texture;
+  info->width  = srcwidth;
+  info->height = srcheight;
+  info->format = destformat;
+  info->smallLodLog2 = _txUtil->grLodLog2(srcwidth, srcheight);
+  info->largeLodLog2 = info->smallLodLog2;
+  info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(srcwidth, srcheight);
+  info->is_hires_tex = 0;
+
+  /* cache the texture. */
+  if (_cacheSize) _txTexCache->add(g64crc, info);
+
+  DBG_INFO(80, L"filtered texture: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+  return 1;
+}
+
+boolean
+TxFilter::hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info)
+{
+  /* NOTE: Rice CRC32 sometimes return the same value for different textures.
+   * As a workaround, Glide64 CRC32 is used for the key for NON-hires
+   * texture cache.
+   *
+   * r_crc64 = hi:palette low:texture
+   *           (separate crc. doesn't necessary have to be rice crc)
+   * g64crc  = texture + palette glide64 crc32
+   *           (can be any other crc if robust)
+   */
+
+  DBG_INFO(80, L"hirestex: r_crc64:%08X %08X, g64crc:%08X %08X\n",
+           (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff),
+           (uint32)(g64crc >> 32), (uint32)(g64crc & 0xffffffff));
+
+#if HIRES_TEXTURE
+  /* check if we have it in hires memory cache. */
+  if ((_options & HIRESTEXTURES_MASK) && r_crc64) {
+    if (_txHiResCache->get(r_crc64, info)) {
+      DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+      /* TODO: Enable emulation for special N64 combiner modes. There are few ways
+       * to get this done. Also applies for CI textures below.
+       *
+       * Solution 1. Load the hiresolution textures in ARGB8888 (or A8, IA88) format
+       * to cache. When a cache is hit, then we take the modes passed in from Glide64
+       * (also TODO) and apply the modification. Then we do color reduction or format
+       * conversion or compression if desired and stuff it into the non-hires texture
+       * cache.
+       *
+       * Solution 2. When a cache is hit and if the combiner modes are present,
+       * convert the texture to ARGB4444 and pass it back to Glide64 to process.
+       * If a texture is compressed, it needs to be decompressed first. Then add
+       * the processed texture to the non-hires texture cache.
+       *
+       * Solution 3. Hybrid of the above 2. Load the textures in ARGB8888 (A8, IA88)
+       * format. Convert the texture to ARGB4444 and pass it back to Glide64 when
+       * the combiner modes are present. Get the processed texture back from Glide64
+       * and compress if desired and add it to the non-hires texture cache.
+       *
+       * Solution 4. Take the easy way out and forget about this whole thing.
+       */
+
+      return 1; /* yep, got it */
+    }
+    if (_txHiResCache->get((r_crc64 & 0xffffffff), info)) {
+      DBG_INFO(80, L"hires hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+
+      /* for true CI textures, we use the passed in palette to convert to
+       * ARGB1555 and add it to memory cache.
+       *
+       * NOTE: we do this AFTER all other texture cache searches because
+       * only a few texture packs actually use true CI textures.
+       *
+       * NOTE: the pre-converted palette from Glide64 is in RGBA5551 format.
+       * A comp comes before RGB comp.
+       */
+      if (palette && info->format == GR_TEXFMT_P_8) {
+        DBG_INFO(80, L"found GR_TEXFMT_P_8 format. Need conversion!!\n");
+
+        int width = info->width;
+        int height = info->height;
+        uint16 format = info->format;
+        /* XXX: avoid collision with zlib compression buffer in TxHiResTexture::get */
+        uint8 *texture = info->data;
+        uint8 *tmptex = (texture == _tex1) ? _tex2 : _tex1;
+
+        /* use palette and convert to 16bit format */
+        _txQuantize->P8_16BPP((uint32*)texture, (uint32*)tmptex, info->width, info->height, (uint32*)palette);
+        texture = tmptex;
+        format = GR_TEXFMT_ARGB_1555;
+
+#if 1
+        /* XXX: compressed if memory cache compression is ON */
+        if (_options & COMPRESSION_MASK) {
+          tmptex = (texture == _tex1) ? _tex2 : _tex1;
+          if (_txQuantize->quantize(texture, tmptex, info->width, info->height, format, GR_TEXFMT_ARGB_8888)) {
+            texture = tmptex;
+            format = GR_TEXFMT_ARGB_8888;
+          }
+          if (format == GR_TEXFMT_ARGB_8888) {
+            tmptex = (texture == _tex1) ? _tex2 : _tex1;
+            if (_txQuantize->compress(texture, tmptex,
+                                      info->width, info->height, GR_TEXFMT_ARGB_1555,
+                                      &width, &height, &format,
+                                      _options & COMPRESSION_MASK)) {
+              texture = tmptex;
+            } else {
+              /*if (!_txQuantize->quantize(texture, tmptex, info->width, info->height, GR_TEXFMT_ARGB_8888, GR_TEXFMT_ARGB_1555)) {
+                DBG_INFO(80, L"Error: unsupported format! gfmt:%x\n", format);
+                return 0;
+              }*/
+              texture = tmptex;
+              format = GR_TEXFMT_ARGB_1555;
+            }
+          }
+        }
+#endif
+
+        /* fill in the required info to return */
+        info->data = texture;
+        info->width = width;
+        info->height = height;
+        info->format = format;
+        info->smallLodLog2 = _txUtil->grLodLog2(width, height);
+        info->largeLodLog2 = info->smallLodLog2;
+        info->aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height);
+        info->is_hires_tex = 1;
+
+        /* XXX: add to hires texture cache!!! */
+        _txHiResCache->add(r_crc64, info);
+
+        DBG_INFO(80, L"GR_TEXFMT_P_8 loaded as gfmt:%x!\n", format);
+      }
+
+      return 1;
+    }
+  }
+#endif
+
+  /* check if we have it in memory cache */
+  if (_cacheSize && g64crc) {
+    if (_txTexCache->get(g64crc, info)) {
+      DBG_INFO(80, L"cache hit: %d x %d gfmt:%x\n", info->width, info->height, info->format);
+      return 1; /* yep, we've got it */
+    }
+  }
+
+  DBG_INFO(80, L"no cache hits.\n");
+
+  return 0;
+}
+
+uint64
+TxFilter::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  if (_options & (HIRESTEXTURES_MASK|DUMP_TEX))
+    return _txUtil->checksum64(src, width, height, size, rowStride, palette);
+
+  return 0;
+}
+
+boolean
+TxFilter::dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64)
+{
+  if (!_initialized)
+    return 0;
+
+  if (!(_options & DUMP_TEX))
+    return 0;
+
+  DBG_INFO(80, L"gfmt = %02x n64fmt = %02x\n", gfmt, n64fmt);
+  DBG_INFO(80, L"hirestex: r_crc64:%08X %08X\n",
+           (uint32)(r_crc64 >> 32), (uint32)(r_crc64 & 0xffffffff));
+
+  if (!_txQuantize->quantize(src, _tex1, rowStridePixel, height, (gfmt & 0x00ff), GR_TEXFMT_ARGB_8888))
+    return 0;
+
+  src = _tex1;
+
+  if (!_path.empty() && !_ident.empty()) {
+    /* dump it to disk */
+    FILE *fp = NULL;
+    std::wstring tmpbuf;
+
+    /* create directories */
+    tmpbuf.assign(_path + L"/texture_dump");
+    if (!boost::filesystem::exists(tmpbuf) &&
+        !boost::filesystem::create_directory(tmpbuf))
+      return 0;
+
+    tmpbuf.append(L"/" + _ident);
+    if (!boost::filesystem::exists(tmpbuf) &&
+        !boost::filesystem::create_directory(tmpbuf))
+      return 0;
+
+    tmpbuf.append(L"/GlideHQ");
+    if (!boost::filesystem::exists(tmpbuf) &&
+        !boost::filesystem::create_directory(tmpbuf))
+      return 0;
+
+    if ((n64fmt >> 8) == 0x2) {
+      tmpbuf.append(boost::str(boost::wformat(L"/%ls#%08X#%01X#%01X#%08X_ciByRGBA.png")
+                               % _ident.c_str() % (uint32)(r_crc64 & 0xffffffff) % (n64fmt >> 8) % (n64fmt & 0xf) % (uint32)(r_crc64 >> 32)));
+    } else {
+      tmpbuf.append(boost::str(boost::wformat(L"/%ls#%08X#%01X#%01X_all.png")
+                               % _ident.c_str() % (uint32)(r_crc64 & 0xffffffff) % (n64fmt >> 8) % (n64fmt & 0xf)));
+    }
+
+#ifdef WIN32
+    if ((fp = _wfopen(tmpbuf.c_str(), L"wb")) != NULL) {
+#else
+    char cbuf[MAX_PATH];
+    wcstombs(cbuf, tmpbuf.c_str(), MAX_PATH);
+    if ((fp = fopen(cbuf, "wb")) != NULL) {
+#endif
+      _txImage->writePNG(src, fp, width, height, (rowStridePixel << 2), 0x0003, 0);
+      fclose(fp);
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+boolean
+TxFilter::reloadhirestex()
+{
+  DBG_INFO(80, L"Reload hires textures from texture pack.\n");
+
+  if (_txHiResCache->load(0)) {
+    if (_txHiResCache->empty()) _options &= ~HIRESTEXTURES_MASK;
+    else _options |= HIRESTEXTURES_MASK;
+
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/GLideNHQ/TxFilter.h b/GLideNHQ/TxFilter.h
new file mode 100644
index 00000000..fdbd0268
--- /dev/null
+++ b/GLideNHQ/TxFilter.h
@@ -0,0 +1,81 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXFILTER_H__
+#define __TXFILTER_H__
+
+#include "TxInternal.h"
+#include "TxQuantize.h"
+#include "TxHiResCache.h"
+#include "TxTexCache.h"
+#include "TxUtil.h"
+#include "TxImage.h"
+#include <string>
+
+class TxFilter
+{
+private:
+  int _numcore;
+
+  uint8 *_tex1;
+  uint8 *_tex2;
+  int _maxwidth;
+  int _maxheight;
+  int _maxbpp;
+  int _options;
+  int _cacheSize;
+  std::wstring _ident;
+  std::wstring _path;
+  TxQuantize *_txQuantize;
+  TxTexCache *_txTexCache;
+  TxHiResCache *_txHiResCache;
+  TxUtil *_txUtil;
+  TxImage *_txImage;
+  boolean _initialized;
+  void clear();
+public:
+  ~TxFilter();
+  TxFilter(int maxwidth,
+           int maxheight,
+           int maxbpp,
+           int options,
+           int cachesize,
+           wchar_t *path,
+           wchar_t *ident,
+           dispInfoFuncExt callback);
+  boolean filter(uint8 *src,
+                  int srcwidth,
+                  int srcheight,
+                  uint16 srcformat,
+                  uint64 g64crc, /* glide64 crc, 64bit for future use */
+                  GHQTexInfo *info);
+  boolean hirestex(uint64 g64crc, /* glide64 crc, 64bit for future use */
+                      uint64 r_crc64,   /* checksum hi:palette low:texture */
+                      uint16 *palette,
+                      GHQTexInfo *info);
+  uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette);
+  boolean dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64);
+  boolean reloadhirestex();
+};
+
+#endif /* __TXFILTER_H__ */
diff --git a/GLideNHQ/TxFilterExport.cpp b/GLideNHQ/TxFilterExport.cpp
new file mode 100644
index 00000000..e9b30f84
--- /dev/null
+++ b/GLideNHQ/TxFilterExport.cpp
@@ -0,0 +1,106 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+#include "TxFilter.h"
+
+TxFilter *txFilter = NULL;
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+TAPI boolean TAPIENTRY
+txfilter_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize,
+              wchar_t *path, wchar_t*ident,
+              dispInfoFuncExt callback)
+{
+  if (txFilter) return 0;
+
+  txFilter = new TxFilter(maxwidth, maxheight, maxbpp, options, cachesize,
+                           path, ident, callback);
+
+  return (txFilter ? 1 : 0);
+}
+
+TAPI void TAPIENTRY
+txfilter_shutdown(void)
+{
+  if (txFilter) delete txFilter;
+
+  txFilter = NULL;
+}
+
+TAPI boolean TAPIENTRY
+txfilter(uint8 *src, int srcwidth, int srcheight, uint16 srcformat,
+         uint64 g64crc, GHQTexInfo *info)
+{
+  if (txFilter)
+    return txFilter->filter(src, srcwidth, srcheight, srcformat,
+                               g64crc, info);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_hirestex(uint64 g64crc, uint64 r_crc64, uint16 *palette, GHQTexInfo *info)
+{
+  if (txFilter)
+    return txFilter->hirestex(g64crc, r_crc64, palette, info);
+
+  return 0;
+}
+
+TAPI uint64 TAPIENTRY
+txfilter_checksum(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  if (txFilter)
+    return txFilter->checksum64(src, width, height, size, rowStride, palette);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_dmptx(uint8 *src, int width, int height, int rowStridePixel, uint16 gfmt, uint16 n64fmt, uint64 r_crc64)
+{
+  if (txFilter)
+    return txFilter->dmptx(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
+
+  return 0;
+}
+
+TAPI boolean TAPIENTRY
+txfilter_reloadhirestex()
+{
+  if (txFilter)
+    return txFilter->reloadhirestex();
+
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/GLideNHQ/TxHiResCache.cpp b/GLideNHQ/TxHiResCache.cpp
new file mode 100644
index 00000000..a23a7254
--- /dev/null
+++ b/GLideNHQ/TxHiResCache.cpp
@@ -0,0 +1,1080 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* 2007 Gonetz <gonetz(at)ngs.ru>
+ * Added callback to display hires texture info. */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+/* dump processed hirestextures to disk
+ * (0:disable, 1:enable) */
+#define DUMP_CACHE 1
+
+/* handle oversized textures by
+ *   0: minification
+ *   1: Glide64 style tiling
+ */
+#define TEXTURE_TILING 1
+
+/* use power of 2 texture size
+ * (0:disable, 1:enable, 2:3dfx) */
+#define POW2_TEXTURES 2
+
+#if TEXTURE_TILING
+#undef POW2_TEXTURES
+#define POW2_TEXTURES 2
+#endif
+
+/* hack to reduce texture footprint to achieve
+ * better performace on midrange gfx cards.
+ * (0:disable, 1:enable) */
+#define REDUCE_TEXTURE_FOOTPRINT 0
+
+/* use aggressive format assumption for quantization
+ * (0:disable, 1:enable, 2:extreme) */
+#define AGGRESSIVE_QUANTIZATION 1
+
+#include "TxHiResCache.h"
+#include "TxDbg.h"
+#include <zlib.h>
+#include <string>
+
+TxHiResCache::~TxHiResCache()
+{
+#if DUMP_CACHE
+  if ((_options & DUMP_HIRESTEXCACHE) && !_haveCache && !_abortLoad) {
+	/* dump cache to disk */
+	std::wstring filename = _ident + L"_HIRESTEXTURES.dat";
+	boost::filesystem::wpath cachepath(_path);
+	cachepath /= boost::filesystem::wpath(L"cache");
+	int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY);
+
+	TxCache::save(cachepath.wstring().c_str(), filename.c_str(), config);
+  }
+#endif
+
+  delete _txImage;
+  delete _txQuantize;
+  delete _txReSample;
+}
+
+TxHiResCache::TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options,
+						   const wchar_t *path, const wchar_t *ident,
+						   dispInfoFuncExt callback
+						   ) : TxCache((options & ~GZ_TEXCACHE), 0, path, ident, callback)
+{
+  _txImage = new TxImage();
+  _txQuantize  = new TxQuantize();
+  _txReSample = new TxReSample();
+
+  _maxwidth  = maxwidth;
+  _maxheight = maxheight;
+  _maxbpp    = maxbpp;
+  _abortLoad = 0;
+  _haveCache = 0;
+
+  /* assert local options */
+  if (!(_options & COMPRESS_HIRESTEX))
+	_options &= ~COMPRESSION_MASK;
+
+  if (_path.empty() || _ident.empty()) {
+	_options &= ~DUMP_HIRESTEXCACHE;
+	return;
+  }
+
+#if DUMP_CACHE
+  /* read in hires texture cache */
+  if (_options & DUMP_HIRESTEXCACHE) {
+	/* find it on disk */
+	std::wstring filename = _ident + L"_HIRESTEXTURES.dat";
+	boost::filesystem::wpath cachepath(_path);
+	cachepath /= boost::filesystem::wpath(L"cache");
+	int config = _options & (HIRESTEXTURES_MASK|COMPRESS_HIRESTEX|COMPRESSION_MASK|TILE_HIRESTEX|FORCE16BPP_HIRESTEX|GZ_HIRESTEXCACHE|LET_TEXARTISTS_FLY);
+
+	_haveCache = TxCache::load(cachepath.wstring().c_str(), filename.c_str(), config);
+  }
+#endif
+
+  /* read in hires textures */
+  if (!_haveCache) TxHiResCache::load(0);
+}
+
+boolean
+TxHiResCache::empty()
+{
+  return _cache.empty();
+}
+
+boolean
+TxHiResCache::load(boolean replace) /* 0 : reload, 1 : replace partial */
+{
+  if (!_path.empty() && !_ident.empty()) {
+
+	if (!replace) TxCache::clear();
+
+	boost::filesystem::wpath dir_path(_path);
+
+	switch (_options & HIRESTEXTURES_MASK) {
+	case GHQ_HIRESTEXTURES:
+	  break;
+	case RICE_HIRESTEXTURES:
+	  INFO(80, L"-----\n");
+	  INFO(80, L"using Rice hires texture format...\n");
+	  INFO(80, L"  must be one of the following;\n");
+	  INFO(80, L"    1) *_rgb.png + *_a.png\n");
+	  INFO(80, L"    2) *_all.png\n");
+	  INFO(80, L"    3) *_ciByRGBA.png\n");
+	  INFO(80, L"    4) *_allciByRGBA.png\n");
+	  INFO(80, L"    5) *_ci.bmp\n");
+	  INFO(80, L"  usage of only 2) and 3) highly recommended!\n");
+	  INFO(80, L"  folder names must be in US-ASCII characters!\n");
+
+	  dir_path /= boost::filesystem::wpath(L"hires_texture");
+	  dir_path /= boost::filesystem::wpath(_ident);
+	  loadHiResTextures(dir_path, replace);
+	  break;
+	case JABO_HIRESTEXTURES:
+	  ;
+	}
+
+	return 1;
+  }
+
+  return 0;
+}
+
+boolean
+TxHiResCache::loadHiResTextures(boost::filesystem::wpath dir_path, boolean replace)
+{
+  DBG_INFO(80, L"-----\n");
+  DBG_INFO(80, L"path: %ls\n", dir_path.string().c_str());
+
+  /* find it on disk */
+  if (!boost::filesystem::exists(dir_path)) {
+	INFO(80, L"Error: path not found!\n");
+	return 0;
+  }
+
+  /* XXX: deal with UNICODE fiasco!
+   * stupidity flows forth beneath this...
+   *
+   * I opted to use chdir in order to use fopen() for windows 9x.
+   */
+#ifdef WIN32
+  wchar_t curpath[MAX_PATH];
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(dir_path.wstring().c_str());
+#else
+  char curpath[MAX_PATH];
+  char cbuf[MAX_PATH];
+  wcstombs(cbuf, dir_path.string().c_str(), MAX_PATH);
+  GETCWD(MAX_PATH, curpath);
+  CHDIR(cbuf);
+#endif
+
+  /* NOTE: I could use the boost::wdirectory_iterator and boost::wpath
+   * to resolve UNICODE file names and paths. But then, _wfopen() is
+   * required to get the file descriptor for MS Windows to pass into
+   * libpng, which is incompatible with Win9x. Win9x's fopen() cannot
+   * handle UNICODE names. UNICODE capable boost::filesystem is available
+   * with Boost1.34.1 built with VC8.0 (bjam --toolset=msvc-8.0 stage).
+   *
+   * RULE OF THUMB: NEVER save texture packs in NON-ASCII names!!
+   */
+  boost::filesystem::directory_iterator it(dir_path);
+  boost::filesystem::directory_iterator end_it; /* default construction yields past-the-end */
+
+  for (; it != end_it; ++it) {
+
+	if (KBHIT(0x1B)) {
+	  _abortLoad = 1;
+	  if (_callback) (*_callback)(L"Aborted loading hiresolution texture!\n");
+	  INFO(80, L"Error: aborted loading hiresolution texture!\n");
+	}
+	if (_abortLoad) break;
+
+	/* recursive read into sub-directory */
+	if (boost::filesystem::is_directory(it->status())) {
+	  loadHiResTextures(it->path(), replace);
+	  continue;
+	}
+
+	DBG_INFO(80, L"-----\n");
+	DBG_INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+
+	int width = 0, height = 0;
+	uint16 format = 0;
+	uint8 *tex = NULL;
+	int tmpwidth = 0, tmpheight = 0;
+	uint16 tmpformat = 0;
+	uint8 *tmptex= NULL;
+	int untiled_width = 0, untiled_height = 0;
+	uint16 destformat = 0;
+
+	/* Rice hi-res textures: begin
+	 */
+	uint32 chksum = 0, fmt = 0, siz = 0, palchksum = 0;
+	char *pfname = NULL, fname[MAX_PATH];
+	std::string ident;
+	FILE *fp = NULL;
+
+	wcstombs(fname, _ident.c_str(), MAX_PATH);
+	/* XXX case sensitivity fiasco!
+	 * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci
+	 * and file extensions must be in lower case letters! */
+#ifdef WIN32
+	{
+	  unsigned int i;
+	  for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]);
+	}
+#endif
+	ident.assign(fname);
+
+	/* read in Rice's file naming convention */
+#define CRCFMTSIZ_LEN 13
+#define PALCRC_LEN 9
+	wcstombs(fname, it->path().leaf().c_str(), MAX_PATH);
+	/* XXX case sensitivity fiasco!
+	 * files must use _a, _rgb, _all, _allciByRGBA, _ciByRGBA, _ci
+	 * and file extensions must be in lower case letters! */
+#ifdef WIN32
+	{
+	  unsigned int i;
+	  for (i = 0; i < strlen(fname); i++) fname[i] = tolower(fname[i]);
+	}
+#endif
+	pfname = fname + strlen(fname) - 4;
+	if (!(pfname == strstr(fname, ".png") ||
+		  pfname == strstr(fname, ".bmp") ||
+		  pfname == strstr(fname, ".dds"))) {
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  INFO(80, L"Error: not png or bmp or dds!\n");
+	  continue;
+	}
+	pfname = strstr(fname, ident.c_str());
+	if (pfname != fname) pfname = 0;
+	if (pfname) {
+	  if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X#%08X", &chksum, &fmt, &siz, &palchksum) == 4)
+		pfname += (ident.size() + CRCFMTSIZ_LEN + PALCRC_LEN);
+	  else if (sscanf(pfname + ident.size(), "#%08X#%01X#%01X", &chksum, &fmt, &siz) == 3)
+		pfname += (ident.size() + CRCFMTSIZ_LEN);
+	  else
+		pfname = 0;
+	}
+	if (!pfname) {
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  INFO(80, L"Error: not Rice texture naming convention!\n");
+	  continue;
+	}
+	if (!chksum) {
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  INFO(80, L"Error: crc32 = 0!\n");
+	  continue;
+	}
+
+	/* check if we already have it in hires texture cache */
+	if (!replace) {
+	  uint64 chksum64 = (uint64)palchksum;
+	  chksum64 <<= 32;
+	  chksum64 |= (uint64)chksum;
+	  if (TxCache::is_cached(chksum64)) {
+#if !DEBUG
+		INFO(80, L"-----\n");
+		INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		INFO(80, L"Error: already cached! duplicate texture!\n");
+		continue;
+	  }
+	}
+
+	DBG_INFO(80, L"rom: %ls chksum:%08X %08X fmt:%x size:%x\n", _ident.c_str(), chksum, palchksum, fmt, siz);
+
+	/* Deal with the wackiness some texture packs utilize Rice format.
+	 * Read in the following order: _a.* + _rgb.*, _all.png _ciByRGBA.png,
+	 * _allciByRGBA.png, and _ci.bmp. PNG are prefered over BMP.
+	 *
+	 * For some reason there are texture packs that include them all. Some
+	 * even have RGB textures named as _all.* and ARGB textures named as
+	 * _rgb.*... Someone pleeeez write a GOOD guideline for the texture
+	 * designers!!!
+	 *
+	 * We allow hires textures to have higher bpp than the N64 originals.
+	 */
+	/* N64 formats
+	 * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I
+	 * Size:   0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit
+	 */
+
+	/*
+	 * read in _rgb.* and _a.*
+	 */
+	if (pfname == strstr(fname, "_rgb.") || pfname == strstr(fname, "_a.")) {
+	  strcpy(pfname, "_rgb.png");
+	  if (!boost::filesystem::exists(fname)) {
+		strcpy(pfname, "_rgb.bmp");
+		if (!boost::filesystem::exists(fname)) {
+#if !DEBUG
+		  INFO(80, L"-----\n");
+		  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		  INFO(80, L"Error: missing _rgb.*! _a.* must be paired with _rgb.*!\n");
+		  continue;
+		}
+	  }
+	  /* _a.png */
+	  strcpy(pfname, "_a.png");
+	  if ((fp = fopen(fname, "rb")) != NULL) {
+		tmptex = _txImage->readPNG(fp, &tmpwidth, &tmpheight, &tmpformat);
+		fclose(fp);
+	  }
+	  if (!tmptex) {
+		/* _a.bmp */
+		strcpy(pfname, "_a.bmp");
+		if ((fp = fopen(fname, "rb")) != NULL) {
+		  tmptex = _txImage->readBMP(fp, &tmpwidth, &tmpheight, &tmpformat);
+		  fclose(fp);
+		}
+	  }
+	  /* _rgb.png */
+	  strcpy(pfname, "_rgb.png");
+	  if ((fp = fopen(fname, "rb")) != NULL) {
+		tex = _txImage->readPNG(fp, &width, &height, &format);
+		fclose(fp);
+	  }
+	  if (!tex) {
+		/* _rgb.bmp */
+		strcpy(pfname, "_rgb.bmp");
+		if ((fp = fopen(fname, "rb")) != NULL) {
+		  tex = _txImage->readBMP(fp, &width, &height, &format);
+		  fclose(fp);
+		}
+	  }
+	  if (tmptex) {
+		/* check if _rgb.* and _a.* have matching size and format. */
+		if (!tex || width != tmpwidth || height != tmpheight ||
+			format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) {
+#if !DEBUG
+		  INFO(80, L"-----\n");
+		  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		  if (!tex) {
+			INFO(80, L"Error: missing _rgb.*!\n");
+		  } else if (width != tmpwidth || height != tmpheight) {
+			INFO(80, L"Error: _rgb.* and _a.* have mismatched width or height!\n");
+		  } else if (format != GR_TEXFMT_ARGB_8888 || tmpformat != GR_TEXFMT_ARGB_8888) {
+			INFO(80, L"Error: _rgb.* or _a.* not in 32bit color!\n");
+		  }
+		  if (tex) free(tex);
+		  if (tmptex) free(tmptex);
+		  tex = NULL;
+		  tmptex = NULL;
+		  continue;
+		}
+	  }
+	  /* make adjustments */
+	  if (tex) {
+		if (tmptex) {
+		  /* merge (A)RGB and A comp */
+		  DBG_INFO(80, L"merge (A)RGB and A comp\n");
+		  int i;
+		  for (i = 0; i < height * width; i++) {
+#if 1
+			/* use R comp for alpha. this is what Rice uses. sigh... */
+			((uint32*)tex)[i] &= 0x00ffffff;
+			((uint32*)tex)[i] |= ((((uint32*)tmptex)[i] & 0x00ff0000) << 8);
+#endif
+#if 0
+			/* use libpng style grayscale conversion */
+			uint32 texel = ((uint32*)tmptex)[i];
+			uint32 acomp = (((texel >> 16) & 0xff) * 6969 +
+							((texel >>  8) & 0xff) * 23434 +
+							((texel      ) & 0xff) * 2365) / 32768;
+			((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff);
+#endif
+#if 0
+			/* use the standard NTSC gray scale conversion */
+			uint32 texel = ((uint32*)tmptex)[i];
+			uint32 acomp = (((texel >> 16) & 0xff) * 299 +
+							((texel >>  8) & 0xff) * 587 +
+							((texel      ) & 0xff) * 114) / 1000;
+			((uint32*)tex)[i] = (acomp << 24) | (((uint32*)tex)[i] & 0x00ffffff);
+#endif
+		  }
+		  free(tmptex);
+		  tmptex = NULL;
+		} else {
+		  /* clobber A comp. never a question of alpha. only RGB used. */
+#if !DEBUG
+		  INFO(80, L"-----\n");
+		  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		  INFO(80, L"Warning: missing _a.*! only using _rgb.*. treat as opaque texture.\n");
+		  int i;
+		  for (i = 0; i < height * width; i++) {
+			((uint32*)tex)[i] |= 0xff000000;
+		  }
+		}
+	  }
+	} else
+
+	/*
+	 * read in _all.png, _all.dds, _allciByRGBA.png, _allciByRGBA.dds
+	 * _ciByRGBA.png, _ciByRGBA.dds, _ci.bmp
+	 */
+	if (pfname == strstr(fname, "_all.png") ||
+		pfname == strstr(fname, "_all.dds") ||
+#ifdef WIN32
+		pfname == strstr(fname, "_allcibyrgba.png") ||
+		pfname == strstr(fname, "_allcibyrgba.dds") ||
+		pfname == strstr(fname, "_cibyrgba.png") ||
+		pfname == strstr(fname, "_cibyrgba.dds") ||
+#else
+		pfname == strstr(fname, "_allciByRGBA.png") ||
+		pfname == strstr(fname, "_allciByRGBA.dds") ||
+		pfname == strstr(fname, "_ciByRGBA.png") ||
+		pfname == strstr(fname, "_ciByRGBA.dds") ||
+#endif
+		pfname == strstr(fname, "_ci.bmp")) {
+	  if ((fp = fopen(fname, "rb")) != NULL) {
+		if      (strstr(fname, ".png")) tex = _txImage->readPNG(fp, &width, &height, &format);
+		else if (strstr(fname, ".dds")) tex = _txImage->readDDS(fp, &width, &height, &format);
+		else                            tex = _txImage->readBMP(fp, &width, &height, &format);
+		fclose(fp);
+	  }
+	  /* XXX: auto-adjustment of dxt dds textures unsupported for now */
+	  if (tex && strstr(fname, ".dds")) {
+		const float aspectratio = (width > height) ? (float)width/(float)height : (float)height/(float)width;
+		if (!(aspectratio == 1.0 ||
+			  aspectratio == 2.0 ||
+			  aspectratio == 4.0 ||
+			  aspectratio == 8.0)) {
+		  free(tex);
+		  tex = NULL;
+#if !DEBUG
+		  INFO(80, L"-----\n");
+		  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		  INFO(80, L"Error: W:H aspect ratio range not 8:1 - 1:8!\n");
+		  continue;
+		}
+		if (width  != _txReSample->nextPow2(width) ||
+			height != _txReSample->nextPow2(height)) {
+		  free(tex);
+		  tex = NULL;
+#if !DEBUG
+		  INFO(80, L"-----\n");
+		  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+		  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+		  INFO(80, L"Error: not power of 2 size!\n");
+		  continue;
+		}
+	  }
+	}
+
+	/* if we do not have a texture at this point we are screwed */
+	if (!tex) {
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  INFO(80, L"Error: load failed!\n");
+	  continue;
+	}
+	DBG_INFO(80, L"read in as %d x %d gfmt:%x\n", tmpwidth, tmpheight, tmpformat);
+
+	/* check if size and format are OK */
+	if (!(format == GR_TEXFMT_ARGB_8888     ||
+		  format == GR_TEXFMT_P_8           ||
+		  format == GR_TEXFMT_ARGB_CMP_DXT1 ||
+		  format == GR_TEXFMT_ARGB_CMP_DXT3 ||
+		  format == GR_TEXFMT_ARGB_CMP_DXT5) ||
+		(width * height) < 4) { /* TxQuantize requirement: width * height must be 4 or larger. */
+	  free(tex);
+	  tex = NULL;
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  INFO(80, L"Error: not width * height > 4 or 8bit palette color or 32bpp or dxt1 or dxt3 or dxt5!\n");
+	  continue;
+	}
+
+	/* analyze and determine best format to quantize */
+	if (format == GR_TEXFMT_ARGB_8888) {
+	  int i;
+	  int alphabits = 0;
+	  int fullalpha = 0;
+	  boolean intensity = 1;
+
+	  if (!(_options & LET_TEXARTISTS_FLY)) {
+		/* HACK ALERT! */
+		/* Account for Rice's weirdness with fmt:0 siz:2 textures.
+		 * Although the conditions are relaxed with other formats,
+		 * the D3D RGBA5551 surface is used for this format in certain
+		 * cases. See Nintemod's SuperMario64 life gauge and power
+		 * meter. The same goes for fmt:2 textures. See Mollymutt's
+		 * PaperMario text. */
+		if ((fmt == 0 && siz == 2) || fmt == 2) {
+		  DBG_INFO(80, L"Remove black, white, etc borders along the alpha edges.\n");
+		  /* round A comp */
+		  for (i = 0; i < height * width; i++) {
+			uint32 texel = ((uint32*)tex)[i];
+			((uint32*)tex)[i] = ((texel & 0xff000000) == 0xff000000 ? 0xff000000 : 0) |
+								(texel & 0x00ffffff);
+		  }
+		  /* Substitute texel color with the average of the surrounding
+		   * opaque texels. This removes borders regardless of hardware
+		   * texture filtering (bilinear, etc). */
+		  int j;
+		  for (i = 0; i < height; i++) {
+			for (j = 0; j < width; j++) {
+			  uint32 texel = ((uint32*)tex)[i * width + j];
+			  if ((texel & 0xff000000) != 0xff000000) {
+				uint32 tmptexel[8];
+				uint32 k, numtexel, r, g, b;
+				numtexel = r = g = b = 0;
+				memset(&tmptexel, 0, sizeof(tmptexel));
+				if (i > 0) {
+				  tmptexel[0] = ((uint32*)tex)[(i - 1) * width + j];                        /* north */
+				  if (j > 0)         tmptexel[1] = ((uint32*)tex)[(i - 1) * width + j - 1]; /* north-west */
+				  if (j < width - 1) tmptexel[2] = ((uint32*)tex)[(i - 1) * width + j + 1]; /* north-east */
+				}
+				if (i < height - 1) {
+				  tmptexel[3] = ((uint32*)tex)[(i + 1) * width + j];                        /* south */
+				  if (j > 0)         tmptexel[4] = ((uint32*)tex)[(i + 1) * width + j - 1]; /* south-west */
+				  if (j < width - 1) tmptexel[5] = ((uint32*)tex)[(i + 1) * width + j + 1]; /* south-east */
+				}
+				if (j > 0)         tmptexel[6] = ((uint32*)tex)[i * width + j - 1]; /* west */
+				if (j < width - 1) tmptexel[7] = ((uint32*)tex)[i * width + j + 1]; /* east */
+				for (k = 0; k < 8; k++) {
+				  if ((tmptexel[k] & 0xff000000) == 0xff000000) {
+					r += ((tmptexel[k] & 0x00ff0000) >> 16);
+					g += ((tmptexel[k] & 0x0000ff00) >>  8);
+					b += ((tmptexel[k] & 0x000000ff)      );
+					numtexel++;
+				  }
+				}
+				if (numtexel) {
+				  ((uint32*)tex)[i * width + j] = ((r / numtexel) << 16) |
+												  ((g / numtexel) <<  8) |
+												  ((b / numtexel)      );
+				} else {
+				  ((uint32*)tex)[i * width + j] = texel & 0x00ffffff;
+				}
+			  }
+			}
+		  }
+		}
+	  }
+
+	  /* simple analysis of texture */
+	  for (i = 0; i < height * width; i++) {
+		uint32 texel = ((uint32*)tex)[i];
+		if (alphabits != 8) {
+#if AGGRESSIVE_QUANTIZATION
+		  if ((texel & 0xff000000) < 0x00000003) {
+			alphabits = 1;
+			fullalpha++;
+		  } else if ((texel & 0xff000000) < 0xfe000000) {
+			alphabits = 8;
+		  }
+#else
+		  if ((texel & 0xff000000) == 0x00000000) {
+			alphabits = 1;
+			fullalpha++;
+		  } else if ((texel & 0xff000000) != 0xff000000) {
+			alphabits = 8;
+		  }
+#endif
+		}
+		if (intensity) {
+		  int rcomp = (texel >> 16) & 0xff;
+		  int gcomp = (texel >>  8) & 0xff;
+		  int bcomp = (texel      ) & 0xff;
+#if AGGRESSIVE_QUANTIZATION
+		  if (abs(rcomp - gcomp) > 8 || abs(rcomp - bcomp) > 8 || abs(gcomp - bcomp) > 8) intensity = 0;
+#else
+		  if (rcomp != gcomp || rcomp != bcomp || gcomp != bcomp) intensity = 0;
+#endif
+		}
+		if (!intensity && alphabits == 8) break;
+	  }
+	  DBG_INFO(80, L"required alpha bits:%d zero acomp texels:%d rgb as intensity:%d\n", alphabits, fullalpha, intensity);
+
+	  /* preparations based on above analysis */
+#if !REDUCE_TEXTURE_FOOTPRINT
+	  if (_maxbpp < 32 || _options & (FORCE16BPP_HIRESTEX|COMPRESSION_MASK)) {
+#endif
+		if      (alphabits == 0) destformat = GR_TEXFMT_RGB_565;
+		else if (alphabits == 1) destformat = GR_TEXFMT_ARGB_1555;
+		else                     destformat = GR_TEXFMT_ARGB_8888;
+#if !REDUCE_TEXTURE_FOOTPRINT
+	  } else {
+		destformat = GR_TEXFMT_ARGB_8888;
+	  }
+#endif
+	  if (fmt == 4 && alphabits == 0) {
+		destformat = GR_TEXFMT_ARGB_8888;
+		/* Rice I format; I = (R + G + B) / 3 */
+		for (i = 0; i < height * width; i++) {
+		  uint32 texel = ((uint32*)tex)[i];
+		  uint32 icomp = (((texel >> 16) & 0xff) +
+						  ((texel >>  8) & 0xff) +
+						  ((texel      ) & 0xff)) / 3;
+		  ((uint32*)tex)[i] = (icomp << 24) | (texel & 0x00ffffff);
+		}
+	  }
+	  if (intensity) {
+		if (alphabits == 0) {
+		  if (fmt == 4) destformat = GR_TEXFMT_ALPHA_8;
+		  else          destformat = GR_TEXFMT_INTENSITY_8;
+		} else {
+		  destformat = GR_TEXFMT_ALPHA_INTENSITY_88;
+		}
+	  }
+
+	  DBG_INFO(80, L"best gfmt:%x\n", destformat);
+	}
+	/*
+	 * Rice hi-res textures: end */
+
+
+	/* XXX: only ARGB8888 for now. comeback to this later... */
+	if (format == GR_TEXFMT_ARGB_8888) {
+
+#if TEXTURE_TILING
+
+	  /* Glide64 style texture tiling */
+	  /* NOTE: narrow wide textures can be tiled into 256x256 size textures */
+
+	  /* adjust texture size to allow tiling for V1, Rush, V2, Banshee, V3 */
+	  /* NOTE: we skip this for palette textures that need minification
+	   * becasue it will look ugly. */
+
+	  /* minification */
+	  {
+		int ratio = 1;
+
+		/* minification to enable glide64 style texture tiling */
+		/* determine the minification ratio to tile the texture into 256x256 size */
+		if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) {
+		  DBG_INFO(80, L"determine minification ratio to tile\n");
+		  tmpwidth = width;
+		  tmpheight = height;
+		  if (height > 256) {
+			ratio = ((height - 1) >> 8) + 1;
+			tmpwidth = width / ratio;
+			tmpheight = height / ratio;
+			DBG_INFO(80, L"height > 256, minification ratio:%d %d x %d -> %d x %d\n",
+					 ratio, width, height, tmpwidth, tmpheight);
+		  }
+		  if (tmpwidth > 256 && (((tmpwidth - 1) >> 8) + 1) * tmpheight > 256) {
+			ratio *= ((((((tmpwidth - 1) >> 8) + 1) * tmpheight) - 1) >> 8) + 1;
+			DBG_INFO(80, L"width > 256, minification ratio:%d %d x %d -> %d x %d\n",
+					 ratio, width, height, width / ratio, height / ratio);
+		  }
+		} else {
+		  /* normal minification to fit max texture size */
+		  if (width > _maxwidth || height > _maxheight) {
+			DBG_INFO(80, L"determine minification ratio to fit max texture size\n");
+			tmpwidth = width;
+			tmpheight = height;
+			while (tmpwidth > _maxwidth) {
+			  tmpheight >>= 1;
+			  tmpwidth >>= 1;
+			  ratio <<= 1;
+			}
+			while (tmpheight > _maxheight) {
+			  tmpheight >>= 1;
+			  tmpwidth >>= 1;
+			  ratio <<= 1;
+			}
+			DBG_INFO(80, L"minification ratio:%d %d x %d -> %d x %d\n",
+					 ratio, width, height, tmpwidth, tmpheight);
+		  }
+		}
+
+		if (ratio > 1) {
+		  if (!_txReSample->minify(&tex, &width, &height, ratio)) {
+			free(tex);
+			tex = NULL;
+			DBG_INFO(80, L"Error: minification failed!\n");
+			continue;
+		  }
+		}
+	  }
+
+	  /* tiling */
+	  if ((_options & TILE_HIRESTEX) && _maxwidth >= 256 && _maxheight >= 256) {
+		boolean usetile = 0;
+
+		/* to tile or not to tile, that is the question */
+		if (width > 256 && height <= 128 && (((width - 1) >> 8) + 1) * height <= 256) {
+
+		  if (width > _maxwidth) usetile = 1;
+		  else {
+			/* tile if the tiled texture memory footprint is smaller */
+			int tilewidth  = 256;
+			int tileheight = _txReSample->nextPow2((((width - 1) >> 8) + 1) * height);
+			tmpwidth  = width;
+			tmpheight = height;
+
+			/* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */
+			if (tilewidth > (tileheight << 3)) tileheight = tilewidth >> 3;
+
+			/* HACKALERT: see TxReSample::pow2(); */
+			if      (tmpwidth  > 64) tmpwidth  -= 4;
+			else if (tmpwidth  > 16) tmpwidth  -= 2;
+			else if (tmpwidth  >  4) tmpwidth  -= 1;
+
+			if      (tmpheight > 64) tmpheight -= 4;
+			else if (tmpheight > 16) tmpheight -= 2;
+			else if (tmpheight >  4) tmpheight -= 1;
+
+			tmpwidth  = _txReSample->nextPow2(tmpwidth);
+			tmpheight = _txReSample->nextPow2(tmpheight);
+
+			/* 3dfx Glide3 tmpheight, W:H aspect ratio range (8:1 - 1:8) */
+			if (tmpwidth > tmpheight) {
+			  if (tmpwidth  > (tmpheight << 3)) tmpheight = tmpwidth  >> 3;
+			} else {
+			  if (tmpheight > (tmpwidth  << 3)) tmpwidth  = tmpheight >> 3;
+			}
+
+			usetile = (tilewidth * tileheight < tmpwidth * tmpheight);
+		  }
+
+		}
+
+		/* tile it! do the actual tiling into 256x256 size */
+		if (usetile) {
+		  DBG_INFO(80, L"Glide64 style texture tiling\n");
+
+		  int x, y, z, ratio, offset;
+		  offset = 0;
+		  ratio = ((width - 1) >> 8) + 1;
+		  tmptex = (uint8 *)malloc(_txUtil->sizeofTx(256, height * ratio, format));
+		  if (tmptex) {
+			for (x = 0; x < ratio; x++) {
+			  for (y = 0; y < height; y++) {
+				if (x < ratio - 1) {
+				  memcpy(&tmptex[offset << 2], &tex[(x * 256 + y * width) << 2], 256 << 2);
+				} else {
+				  for (z = 0; z < width - 256 * (ratio - 1); z++) {
+					((uint32*)tmptex)[offset + z] = ((uint32*)tex)[x * 256 + y * width + z];
+				  }
+				  for (; z < 256; z++) {
+					((uint32*)tmptex)[offset + z] = ((uint32*)tmptex)[offset + z - 1];
+				  }
+				}
+				offset += 256;
+			  }
+			}
+			free(tex);
+			tex = tmptex;
+			untiled_width = width;
+			untiled_height = height;
+			width = 256;
+			height *= ratio;
+			DBG_INFO(80, L"Tiled: %d x %d -> %d x %d\n", untiled_width, untiled_height, width, height);
+		  }
+		}
+	  }
+
+#else  /* TEXTURE_TILING */
+
+	  /* minification */
+	  if (width > _maxwidth || height > _maxheight) {
+		int ratio = 1;
+		if (width / _maxwidth > height / _maxheight) {
+		  ratio = (int)ceil((double)width / _maxwidth);
+		} else {
+		  ratio = (int)ceil((double)height / _maxheight);
+		}
+		if (!_txReSample->minify(&tex, &width, &height, ratio)) {
+		  free(tex);
+		  tex = NULL;
+		  DBG_INFO(80, L"Error: minification failed!\n");
+		  continue;
+		}
+	  }
+
+#endif /* TEXTURE_TILING */
+
+	  /* texture compression */
+	  if ((_options & COMPRESSION_MASK) &&
+		  (width >= 64 && height >= 64) /* Texture compression is not suitable for low pixel coarse detail
+										 * textures. The assumption here is that textures larger than 64x64
+										 * have enough detail to produce decent quality when compressed. The
+										 * down side is that narrow stripped textures that the N64 often use
+										 * for large background textures are also ignored. It would be more
+										 * reasonable if decisions are made based on fourier-transform
+										 * spectrum or RMS error.
+										 *
+										 * NOTE: texture size must be checked before expanding to pow2 size.
+										 */
+		  ) {
+		uint32 alpha = 0;
+		int dataSize = 0;
+		int compressionType = _options & COMPRESSION_MASK;
+
+#if POW2_TEXTURES
+#if (POW2_TEXTURES == 2)
+		/* 3dfx Glide3x aspect ratio (8:1 - 1:8) */
+		if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) {
+#else
+		/* normal pow2 expansion */
+		if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) {
+#endif
+		  free(tex);
+		  tex = NULL;
+		  DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n");
+		  continue;
+		}
+#endif
+
+		switch (_options & COMPRESSION_MASK) {
+		case S3TC_COMPRESSION:
+		  switch (destformat) {
+		  case GR_TEXFMT_ARGB_8888:
+#if GLIDE64_DXTN
+		  case GR_TEXFMT_ARGB_1555: /* for ARGB1555 use DXT5 instead of DXT1 */
+#endif
+		  case GR_TEXFMT_ALPHA_INTENSITY_88:
+			dataSize = width * height;
+			break;
+#if !GLIDE64_DXTN
+		  case GR_TEXFMT_ARGB_1555:
+#endif
+		  case GR_TEXFMT_RGB_565:
+		  case GR_TEXFMT_INTENSITY_8:
+			dataSize = (width * height) >> 1;
+			break;
+		  case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */
+			;
+		  }
+		  break;
+		case FXT1_COMPRESSION:
+		  switch (destformat) {
+		  case GR_TEXFMT_ARGB_1555:
+		  case GR_TEXFMT_RGB_565:
+		  case GR_TEXFMT_INTENSITY_8:
+			dataSize = (width * height) >> 1;
+			break;
+			/* XXX: textures that use 8bit alpha channel look bad with the current
+			 * fxt1 library, so we substitute it with dxtn for now. afaik all gfx
+			 * cards that support fxt1 also support dxtn. (3dfx and Intel) */
+		  case GR_TEXFMT_ALPHA_INTENSITY_88:
+		  case GR_TEXFMT_ARGB_8888:
+			compressionType = S3TC_COMPRESSION;
+			dataSize = width * height;
+			break;
+		  case GR_TEXFMT_ALPHA_8: /* no size benefit with dxtn */
+			;
+		  }
+		}
+		/* compress it! */
+		if (dataSize) {
+#if 0 /* TEST: dither before compression for better results with gradients */
+		  tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat));
+		  if (tmptex) {
+			if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0))
+			  _txQuantize->quantize(tmptex, tex, width, height, destformat, GR_TEXFMT_ARGB_8888, 0);
+			free(tmptex);
+		  }
+#endif
+		  tmptex = (uint8 *)malloc(dataSize);
+		  if (tmptex) {
+			if (_txQuantize->compress(tex, tmptex,
+									  width, height, destformat,
+									  &tmpwidth, &tmpheight, &tmpformat,
+									  compressionType)) {
+			  free(tex);
+			  tex = tmptex;
+			  width = tmpwidth;
+			  height = tmpheight;
+			  format = destformat = tmpformat;
+			} else {
+			  free(tmptex);
+			}
+		  }
+		}
+
+	  } else {
+
+#if POW2_TEXTURES
+#if (POW2_TEXTURES == 2)
+		/* 3dfx Glide3x aspect ratio (8:1 - 1:8) */
+		if (!_txReSample->nextPow2(&tex, &width , &height, 32, 1)) {
+#else
+		/* normal pow2 expansion */
+		if (!_txReSample->nextPow2(&tex, &width , &height, 32, 0)) {
+#endif
+		  free(tex);
+		  tex = NULL;
+		  DBG_INFO(80, L"Error: aspect ratio adjustment failed!\n");
+		  continue;
+		}
+#endif
+	  }
+
+	  /* quantize */
+	  {
+		tmptex = (uint8 *)malloc(_txUtil->sizeofTx(width, height, destformat));
+		if (tmptex) {
+		  switch (destformat) {
+		  case GR_TEXFMT_ARGB_8888:
+		  case GR_TEXFMT_ARGB_4444:
+#if !REDUCE_TEXTURE_FOOTPRINT
+			if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+			  destformat = GR_TEXFMT_ARGB_4444;
+			break;
+		  case GR_TEXFMT_ARGB_1555:
+#if !REDUCE_TEXTURE_FOOTPRINT
+			if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+			  destformat = GR_TEXFMT_ARGB_1555;
+			break;
+		  case GR_TEXFMT_RGB_565:
+#if !REDUCE_TEXTURE_FOOTPRINT
+			if (_maxbpp < 32 || _options & FORCE16BPP_HIRESTEX)
+#endif
+			  destformat = GR_TEXFMT_RGB_565;
+			break;
+		  case GR_TEXFMT_ALPHA_INTENSITY_88:
+		  case GR_TEXFMT_ALPHA_INTENSITY_44:
+#if !REDUCE_TEXTURE_FOOTPRINT
+			destformat = GR_TEXFMT_ALPHA_INTENSITY_88;
+#else
+			destformat = GR_TEXFMT_ALPHA_INTENSITY_44;
+#endif
+			break;
+		  case GR_TEXFMT_ALPHA_8:
+			destformat = GR_TEXFMT_ALPHA_8; /* yes, this is correct. ALPHA_8 instead of INTENSITY_8 */
+			break;
+		  case GR_TEXFMT_INTENSITY_8:
+			destformat = GR_TEXFMT_INTENSITY_8;
+		  }
+		  if (_txQuantize->quantize(tex, tmptex, width, height, GR_TEXFMT_ARGB_8888, destformat, 0)) {
+			format = destformat;
+			free(tex);
+			tex = tmptex;
+		  }
+		}
+	  }
+
+	}
+
+
+	/* last minute validations */
+	if (!tex || !chksum || !width || !height || !format || width > _maxwidth || height > _maxheight) {
+#if !DEBUG
+	  INFO(80, L"-----\n");
+	  INFO(80, L"path: %ls\n", dir_path.string().c_str());
+	  INFO(80, L"file: %ls\n", it->path().leaf().c_str());
+#endif
+	  if (tex) {
+		free(tex);
+		tex = NULL;
+		INFO(80, L"Error: bad format or size! %d x %d gfmt:%x\n", width, height, format);
+	  } else {
+		INFO(80, L"Error: load failed!!\n");
+	  }
+	  continue;
+	}
+
+	/* load it into hires texture cache. */
+	{
+	  uint64 chksum64 = (uint64)palchksum;
+	  chksum64 <<= 32;
+	  chksum64 |= (uint64)chksum;
+
+	  GHQTexInfo tmpInfo;
+	  memset(&tmpInfo, 0, sizeof(GHQTexInfo));
+
+	  tmpInfo.data = tex;
+	  tmpInfo.width = width;
+	  tmpInfo.height = height;
+	  tmpInfo.format = format;
+	  tmpInfo.largeLodLog2 = _txUtil->grLodLog2(width, height);
+	  tmpInfo.smallLodLog2 = tmpInfo.largeLodLog2;
+	  tmpInfo.aspectRatioLog2 = _txUtil->grAspectRatioLog2(width, height);
+	  tmpInfo.is_hires_tex = 1;
+
+#if TEXTURE_TILING
+	  /* Glide64 style texture tiling. */
+	  if (untiled_width && untiled_height) {
+		tmpInfo.tiles = ((untiled_width - 1) >> 8) + 1;
+		tmpInfo.untiled_width = untiled_width;
+		tmpInfo.untiled_height = untiled_height;
+	  }
+#endif
+
+	  /* remove redundant in cache */
+	  if (replace && TxCache::del(chksum64)) {
+		DBG_INFO(80, L"removed duplicate old cache.\n");
+	  }
+
+	  /* add to cache */
+	  if (TxCache::add(chksum64, &tmpInfo)) {
+		/* Callback to display hires texture info.
+		 * Gonetz <gonetz(at)ngs.ru> */
+		if (_callback) {
+		  wchar_t tmpbuf[MAX_PATH];
+		  mbstowcs(tmpbuf, fname, MAX_PATH);
+		  (*_callback)(L"[%d] total mem:%.2fmb - %ls\n", _cache.size(), (float)_totalSize/1000000, tmpbuf);
+		}
+		DBG_INFO(80, L"texture loaded!\n");
+	  }
+	  free(tex);
+	}
+
+  }
+
+  CHDIR(curpath);
+
+  return 1;
+}
diff --git a/GLideNHQ/TxHiResCache.h b/GLideNHQ/TxHiResCache.h
new file mode 100644
index 00000000..9da88c7c
--- /dev/null
+++ b/GLideNHQ/TxHiResCache.h
@@ -0,0 +1,60 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXHIRESCACHE_H__
+#define __TXHIRESCACHE_H__
+
+/* support hires textures
+ *   0: disable
+ *   1: enable
+ */
+#define HIRES_TEXTURE 1
+
+#include "TxCache.h"
+#include "TxQuantize.h"
+#include "TxImage.h"
+#include "TxReSample.h"
+#include <boost/filesystem.hpp>
+
+class TxHiResCache : public TxCache
+{
+private:
+  int _maxwidth;
+  int _maxheight;
+  int _maxbpp;
+  boolean _haveCache;
+  boolean _abortLoad;
+  TxImage *_txImage;
+  TxQuantize *_txQuantize;
+  TxReSample *_txReSample;
+  boolean loadHiResTextures(boost::filesystem::wpath dir_path, boolean replace);
+public:
+  ~TxHiResCache();
+  TxHiResCache(int maxwidth, int maxheight, int maxbpp, int options,
+               const wchar_t *path, const wchar_t *ident,
+               dispInfoFuncExt callback);
+  boolean empty();
+  boolean load(boolean replace);
+};
+
+#endif /* __TXHIRESCACHE_H__ */
diff --git a/GLideNHQ/TxImage.cpp b/GLideNHQ/TxImage.cpp
new file mode 100644
index 00000000..871a004c
--- /dev/null
+++ b/GLideNHQ/TxImage.cpp
@@ -0,0 +1,799 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* use power of 2 texture size
+ * (0:disable, 1:enable, 2:3dfx) */
+#define POW2_TEXTURES 0
+
+/* check 8 bytes. use a larger value if needed. */
+#define PNG_CHK_BYTES 8
+
+#include "TxImage.h"
+#include "TxReSample.h"
+#include "TxDbg.h"
+#include <stdlib.h>
+
+boolean
+TxImage::getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr)
+{
+  unsigned char sig[PNG_CHK_BYTES];
+
+  /* check for valid file pointer */
+  if (!fp)
+    return 0;
+
+  /* check if file is PNG */
+  if (fread(sig, 1, PNG_CHK_BYTES, fp) != PNG_CHK_BYTES)
+    return 0;
+
+  if (png_sig_cmp(sig, 0, PNG_CHK_BYTES) != 0)
+    return 0;
+
+  /* get PNG file info */
+  *png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  if (!*png_ptr)
+    return 0;
+
+  *info_ptr = png_create_info_struct(*png_ptr);
+  if (!*info_ptr) {
+    png_destroy_read_struct(png_ptr, NULL, NULL);
+    return 0;
+  }
+
+  if (setjmp(png_jmpbuf(*png_ptr))) {
+    DBG_INFO(80, L"error reading png!\n");
+    png_destroy_read_struct(png_ptr, info_ptr, NULL);
+    return 0;
+  }
+
+  png_init_io(*png_ptr, fp);
+  png_set_sig_bytes(*png_ptr, PNG_CHK_BYTES);
+  png_read_info(*png_ptr, *info_ptr);
+
+  return 1;
+}
+
+uint8*
+TxImage::readPNG(FILE* fp, int* width, int* height, uint16* format)
+{
+  /* NOTE: returned image format is GR_TEXFMT_ARGB_8888 */
+
+  png_structp png_ptr;
+  png_infop info_ptr;
+  uint8 *image = NULL;
+  int bit_depth, color_type, interlace_type, compression_type, filter_type,
+      row_bytes, o_width, o_height, num_pas;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid png file */
+  if (!fp)
+    return NULL;
+
+  if (!getPNGInfo(fp, &png_ptr, &info_ptr)) {
+    INFO(80, L"error reading png file! png image is corrupt.\n");
+    return NULL;
+  }
+
+  png_get_IHDR(png_ptr, info_ptr,
+               (png_uint_32*)&o_width, (png_uint_32*)&o_height, &bit_depth, &color_type,
+               &interlace_type, &compression_type, &filter_type);
+
+  DBG_INFO(80, L"png format %d x %d bitdepth:%d color:%x interlace:%x compression:%x filter:%x\n",
+           o_width, o_height, bit_depth, color_type,
+           interlace_type, compression_type, filter_type);
+
+  /* transformations */
+
+  /* Rice hi-res textures
+   * _all.png
+   * _rgb.png, _a.png
+   * _ciByRGBA.png
+   * _allciByRGBA.png
+   */
+
+  /* strip if color channel is larger than 8 bits */
+  if (bit_depth > 8) {
+    png_set_strip_16(png_ptr);
+    bit_depth = 8;
+  }
+
+#if 1
+  /* These are not really required per Rice format spec,
+   * but is done just in case someone uses them.
+   */
+  /* convert palette color to rgb color */
+  if (color_type == PNG_COLOR_TYPE_PALETTE) {
+    png_set_palette_to_rgb(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB;
+  }
+
+  /* expand 1,2,4 bit gray scale to 8 bit gray scale */
+  if (color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8)
+    png_set_expand_gray_1_2_4_to_8(png_ptr);
+
+  /* convert gray scale or gray scale + alpha to rgb color */
+  if (color_type == PNG_COLOR_TYPE_GRAY ||
+      color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
+    png_set_gray_to_rgb(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB;
+  }
+#endif
+
+  /* add alpha channel if any */
+  if (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) {
+    png_set_tRNS_to_alpha(png_ptr);
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+  }
+
+  /* convert rgb to rgba */
+  if (color_type == PNG_COLOR_TYPE_RGB) {
+    png_set_filler(png_ptr, 0xff, PNG_FILLER_AFTER);
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+  }
+
+  /* punt invalid formats */
+  if (color_type != PNG_COLOR_TYPE_RGB_ALPHA) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+    DBG_INFO(80, L"Error: not PNG_COLOR_TYPE_RGB_ALPHA format!\n");
+    return NULL;
+  }
+
+  /*png_color_8p sig_bit;
+  if (png_get_sBIT(png_ptr, info_ptr, &sig_bit))
+    png_set_shift(png_ptr, sig_bit);*/
+
+  /* convert rgba to bgra */
+  png_set_bgr(png_ptr);
+
+  /* turn on interlace handling to cope with the weirdness
+   * of texture authors using interlaced format */
+  num_pas = png_set_interlace_handling(png_ptr);
+
+  /* update info structure */
+  png_read_update_info(png_ptr, info_ptr);
+
+  /* we only get here if ARGB8888 */
+  row_bytes = png_get_rowbytes(png_ptr, info_ptr);
+
+  /* allocate memory to read in image */
+  image = (uint8*)malloc(row_bytes * o_height);
+
+  /* read in image */
+  if (image) {
+    int pas, i;
+    uint8* tmpimage;
+
+    for (pas = 0; pas < num_pas; pas++) { /* deal with interlacing */
+      tmpimage = image;
+
+      for (i = 0; i < o_height; i++) {
+        /* copy row */
+        png_read_rows(png_ptr, &tmpimage, NULL, 1);
+        tmpimage += row_bytes;
+      }
+    }
+
+    /* read rest of the info structure */
+    png_read_end(png_ptr, info_ptr);
+
+    *width = (row_bytes >> 2);
+    *height = o_height;
+    *format = GR_TEXFMT_ARGB_8888;
+
+#if POW2_TEXTURES
+    /* next power of 2 size conversions */
+    /* NOTE: I can do this in the above loop for faster operations, but some
+     * texture packs require a workaround. see HACKALERT in nextPow2().
+     */
+
+    TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place.
+
+#if (POW2_TEXTURES == 2)
+    if (!txReSample->nextPow2(&image, width, height, 32, 1)) {
+#else
+    if (!txReSample->nextPow2(&image, width, height, 32, 0)) {
+#endif
+      if (image) {
+        free(image);
+        image = NULL;
+      }
+      *width = 0;
+      *height = 0;
+      *format = 0;
+    }
+
+    delete txReSample;
+
+#endif /* POW2_TEXTURES */
+  }
+
+  /* clean up */
+  png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+
+#ifdef DEBUG
+  if (!image) {
+    DBG_INFO(80, L"Error: failed to load png image!\n");
+  }
+#endif
+
+  return image;
+}
+
+boolean
+TxImage::writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette)
+{
+  png_structp png_ptr;
+  png_infop info_ptr;
+  png_color_8 sig_bit;
+  png_colorp palette_ptr;
+  png_bytep trans_ptr;//, tex_ptr;
+  int bit_depth, color_type, row_bytes, num_palette;
+  int i;
+  //uint16 srcfmt, destfmt;
+
+  if (!src || !fp)
+    return 0;
+
+  png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  if (png_ptr == NULL)
+    return 0;
+
+  info_ptr = png_create_info_struct(png_ptr);
+  if (info_ptr == NULL) {
+    png_destroy_write_struct(&png_ptr, NULL);
+    return 0;
+  }
+
+  if (setjmp(png_ptr->jmpbuf)) {
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+    return 0;
+  }
+
+  png_init_io(png_ptr, fp);
+
+  /* TODO: images must be converted to RGBA8888 or CI8,
+   * palettes need to be separated to A and RGB. */
+
+  /* N64 formats
+   * Format: 0 - RGBA, 1 - YUV, 2 - CI, 3 - IA, 4 - I
+   * Size:   0 - 4bit, 1 - 8bit, 2 - 16bit, 3 - 32 bit
+   * format = (Format << 8 | Size);
+   */
+
+  /* each channel is saved in 8bits for consistency */
+  switch (format) {
+  case 0x0002:/* RGBA5551 */
+    bit_depth = 8;
+    sig_bit.red   = 5;
+    sig_bit.green = 5;
+    sig_bit.blue  = 5;
+    sig_bit.alpha = 1;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0003:/* RGBA8888 */
+  case 0x0302:/* IA88 */
+    bit_depth = 8;
+    sig_bit.red   = 8;
+    sig_bit.green = 8;
+    sig_bit.blue  = 8;
+    sig_bit.alpha = 8;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0300:/* IA31 */
+    bit_depth = 8;
+    sig_bit.red   = 3;
+    sig_bit.green = 3;
+    sig_bit.blue  = 3;
+    sig_bit.alpha = 1;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0301:/* IA44 */
+    bit_depth = 8;
+    sig_bit.red   = 4;
+    sig_bit.green = 4;
+    sig_bit.blue  = 4;
+    sig_bit.alpha = 4;
+    color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+    break;
+  case 0x0400:/* I4 */
+    bit_depth = 8;
+    sig_bit.red   = 4;
+    sig_bit.green = 4;
+    sig_bit.blue  = 4;
+    color_type = PNG_COLOR_TYPE_RGB;
+    break;
+  case 0x0401:/* I8 */
+  case 0x0402:/* I16 */
+    bit_depth = 8;
+    sig_bit.red   = 8;
+    sig_bit.green = 8;
+    sig_bit.blue  = 8;
+    color_type = PNG_COLOR_TYPE_RGB;
+    break;
+  case 0x0200:/* CI4 */
+    bit_depth = 8;
+    num_palette = 16;
+    color_type = PNG_COLOR_TYPE_PALETTE;
+    break;
+  case 0x0201:/* CI8 */
+    bit_depth = 8;
+    num_palette = 256;
+    color_type = PNG_COLOR_TYPE_PALETTE;
+    break;
+  case 0x0102:/* YUV ? */
+  case 0x0103:
+  default:
+    /* unsupported format */
+    png_destroy_write_struct(&png_ptr, &info_ptr);
+    return 0;
+  }
+
+  switch (color_type) {
+  case PNG_COLOR_TYPE_RGB_ALPHA:
+  case PNG_COLOR_TYPE_RGB:
+    //row_bytes = (bit_depth * width) >> 1;
+    row_bytes = rowStride;
+    png_set_bgr(png_ptr);
+    png_set_sBIT(png_ptr, info_ptr, &sig_bit);
+    break;
+  case PNG_COLOR_TYPE_PALETTE:
+    //row_bytes = (bit_depth * width) >> 3;
+    row_bytes = rowStride;
+    png_set_PLTE(png_ptr, info_ptr, palette_ptr, num_palette);
+    png_set_tRNS(png_ptr, info_ptr, trans_ptr, num_palette, 0);
+  }
+
+  //png_set_filter(png_ptr, 0, PNG_ALL_FILTERS);
+
+  //if (bit_depth == 16)
+  //  png_set_swap(png_ptr);
+
+  //if (bit_depth < 8)
+  //  png_set_packswap(png_ptr);
+
+  png_set_IHDR(png_ptr, info_ptr, width, height,
+               bit_depth, color_type, PNG_INTERLACE_NONE,
+               PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
+
+  //png_set_gAMA(png_ptr, info_ptr, 1.0);
+
+  png_write_info(png_ptr, info_ptr);
+  for (i = 0; i < height; i++) {
+    png_write_row(png_ptr, (png_bytep)src);
+    src += row_bytes;
+  }
+  png_write_end(png_ptr, info_ptr);
+
+  png_destroy_write_struct(&png_ptr, &info_ptr);
+
+  //if (tex_ptr) delete [] tex_ptr;
+
+  return 1;
+}
+
+boolean
+TxImage::getBMPInfo(FILE* fp, BITMAPFILEHEADER* bmp_fhdr, BITMAPINFOHEADER* bmp_ihdr)
+{
+  /*
+   * read in BITMAPFILEHEADER
+   */
+
+  /* is this a BMP file? */
+  if (fread(&bmp_fhdr->bfType, 2, 1, fp) != 1)
+    return 0;
+
+  if (memcmp(&bmp_fhdr->bfType, "BM", 2) != 0)
+    return 0;
+
+  /* get file size */
+  if (fread(&bmp_fhdr->bfSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* reserved 1 */
+  if (fread(&bmp_fhdr->bfReserved1, 2, 1, fp) != 1)
+    return 0;
+
+  /* reserved 2 */
+  if (fread(&bmp_fhdr->bfReserved2, 2, 1, fp) != 1)
+    return 0;
+
+  /* offset to the image data */
+  if (fread(&bmp_fhdr->bfOffBits, 4, 1, fp) != 1)
+    return 0;
+
+  /*
+   * read in BITMAPINFOHEADER
+   */
+
+  /* size of BITMAPINFOHEADER */
+  if (fread(&bmp_ihdr->biSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* is this a Windows BMP? */
+  if (bmp_ihdr->biSize != 40)
+    return 0;
+
+  /* width of the bitmap in pixels */
+  if (fread(&bmp_ihdr->biWidth, 4, 1, fp) != 1)
+    return 0;
+
+  /* height of the bitmap in pixels */
+  if (fread(&bmp_ihdr->biHeight, 4, 1, fp) != 1)
+    return 0;
+
+  /* number of planes (always 1) */
+  if (fread(&bmp_ihdr->biPlanes, 2, 1, fp) != 1)
+    return 0;
+
+  /* number of bits-per-pixel. (1, 4, 8, 16, 24, 32) */
+  if (fread(&bmp_ihdr->biBitCount, 2, 1, fp) != 1)
+    return 0;
+
+  /* compression for a compressed bottom-up bitmap
+   *   0 : uncompressed format
+   *   1 : run-length encoded 4 bpp format
+   *   2 : run-length encoded 8 bpp format
+   *   3 : bitfield
+   */
+  if (fread(&bmp_ihdr->biCompression, 4, 1, fp) != 1)
+    return 0;
+
+  /* size of the image in bytes */
+  if (fread(&bmp_ihdr->biSizeImage, 4, 1, fp) != 1)
+    return 0;
+
+  /* horizontal resolution in pixels-per-meter */
+  if (fread(&bmp_ihdr->biXPelsPerMeter, 4, 1, fp) != 1)
+    return 0;
+
+  /* vertical resolution in pixels-per-meter */
+  if (fread(&bmp_ihdr->biYPelsPerMeter, 4, 1, fp) != 1)
+    return 0;
+
+  /* number of color indexes in the color table that are actually used */
+  if (fread(&bmp_ihdr->biClrUsed, 4, 1, fp) != 1)
+    return 0;
+
+  /*  the number of color indexes that are required for displaying */
+  if (fread(&bmp_ihdr->biClrImportant, 4, 1, fp) != 1)
+    return 0;
+
+  return 1;
+}
+
+uint8*
+TxImage::readBMP(FILE* fp, int* width, int* height, uint16* format)
+{
+  /* NOTE: returned image format;
+   *       4, 8bit palette bmp -> GR_TEXFMT_P_8
+   *       24, 32bit bmp -> GR_TEXFMT_ARGB_8888
+   */
+
+  uint8 *image = NULL;
+  uint8 *image_row = NULL;
+  uint8 *tmpimage = NULL;
+  int row_bytes, pos, i, j;
+  /* Windows Bitmap */
+  BITMAPFILEHEADER bmp_fhdr;
+  BITMAPINFOHEADER bmp_ihdr;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid bmp file */
+  if (!fp)
+    return NULL;
+
+  if (!getBMPInfo(fp, &bmp_fhdr, &bmp_ihdr)) {
+    INFO(80, L"error reading bitmap file! bitmap image is corrupt.\n");
+    return NULL;
+  }
+
+  DBG_INFO(80, L"bmp format %d x %d bitdepth:%d compression:%x offset:%d\n",
+           bmp_ihdr.biWidth, bmp_ihdr.biHeight, bmp_ihdr.biBitCount,
+           bmp_ihdr.biCompression, bmp_fhdr.bfOffBits);
+
+  /* rowStride in bytes */
+  row_bytes = (bmp_ihdr.biWidth * bmp_ihdr.biBitCount) >> 3;
+  /* align to 4bytes boundary */
+  row_bytes = (row_bytes + 3) & ~3;
+
+  /* Rice hi-res textures */
+  if (!(bmp_ihdr.biBitCount == 8 || bmp_ihdr.biBitCount == 4 || bmp_ihdr.biBitCount == 32 || bmp_ihdr.biBitCount == 24) ||
+      bmp_ihdr.biCompression != 0) {
+    DBG_INFO(80, L"Error: incompatible bitmap format!\n");
+    return NULL;
+  }
+
+  switch (bmp_ihdr.biBitCount) {
+  case 8:
+  case 32:
+    /* 8 bit, 32 bit bitmap */
+    image = (uint8*)malloc(row_bytes * bmp_ihdr.biHeight);
+    if (image) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(tmpimage, row_bytes, 1, fp);
+        tmpimage += row_bytes;
+        pos -= row_bytes;
+      }
+    }
+    break;
+  case 4:
+    /* 4bit bitmap */
+    image = (uint8*)malloc((row_bytes * bmp_ihdr.biHeight) << 1);
+    image_row = (uint8*)malloc(row_bytes);
+    if (image && image_row) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(image_row, row_bytes, 1, fp);
+        /* expand 4bpp to 8bpp. stuff 4bit values into 8bit comps. */
+        for (j = 0; j < row_bytes; j++) {
+          tmpimage[j << 1] = image_row[j] & 0x0f;
+          tmpimage[(j << 1) + 1] = (image_row[j] & 0xf0) >> 4;
+        }
+        tmpimage += (row_bytes << 1);
+        pos -= row_bytes;
+      }
+      free(image_row);
+    } else {
+      if (image_row) free(image_row);
+      if (image) free(image);
+      image = NULL;
+    }
+    break;
+  case 24:
+    /* 24 bit bitmap */
+    image = (uint8*)malloc((bmp_ihdr.biWidth * bmp_ihdr.biHeight) << 2);
+    image_row = (uint8*)malloc(row_bytes);
+    if (image && image_row) {
+      tmpimage = image;
+      pos = bmp_fhdr.bfOffBits + row_bytes * (bmp_ihdr.biHeight - 1);
+      for (i = 0; i < bmp_ihdr.biHeight; i++) {
+        /* read in image */
+        fseek(fp, pos, SEEK_SET);
+        fread(image_row, row_bytes, 1, fp);
+        /* convert 24bpp to 32bpp. */
+        for (j = 0; j < bmp_ihdr.biWidth; j++) {
+          tmpimage[(j << 2)]     = image_row[j * 3];
+          tmpimage[(j << 2) + 1] = image_row[j * 3 + 1];
+          tmpimage[(j << 2) + 2] = image_row[j * 3 + 2];
+          tmpimage[(j << 2) + 3] = 0xFF;
+        }
+        tmpimage += (bmp_ihdr.biWidth << 2);
+        pos -= row_bytes;
+      }
+      free(image_row);
+    } else {
+      if (image_row) free(image_row);
+      if (image) free(image);
+      image = NULL;
+    }
+  }
+
+  if (image) {
+    *width = (row_bytes << 3) / bmp_ihdr.biBitCount;
+    *height = bmp_ihdr.biHeight;
+
+    switch (bmp_ihdr.biBitCount) {
+    case 8:
+    case 4:
+      *format = GR_TEXFMT_P_8;
+      break;
+    case 32:
+    case 24:
+      *format = GR_TEXFMT_ARGB_8888;
+    }
+
+#if POW2_TEXTURES
+    /* next power of 2 size conversions */
+    /* NOTE: I can do this in the above loop for faster operations, but some
+     * texture packs require a workaround. see HACKALERT in nextPow2().
+     */
+
+    TxReSample txReSample = new TxReSample; // XXX: temporary. move to a better place.
+
+#if (POW2_TEXTURES == 2)
+    if (!txReSample->nextPow2(&image, width, height, 8, 1)) {
+#else
+    if (!txReSample->nextPow2(&image, width, height, 8, 0)) {
+#endif
+      if (image) {
+        free(image);
+        image = NULL;
+      }
+      *width = 0;
+      *height = 0;
+      *format = 0;
+    }
+
+    delete txReSample;
+
+#endif /* POW2_TEXTURES */
+  }
+
+#ifdef DEBUG
+  if (!image) {
+    DBG_INFO(80, L"Error: failed to load bmp image!\n");
+  }
+#endif
+
+  return image;
+}
+
+boolean
+TxImage::getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr)
+{
+  /*
+   * read in DDSFILEHEADER
+   */
+
+  /* is this a DDS file? */
+  if (fread(&dds_fhdr->dwMagic, 4, 1, fp) != 1)
+    return 0;
+
+  if (memcmp(&dds_fhdr->dwMagic, "DDS ", 4) != 0)
+    return 0;
+
+  if (fread(&dds_fhdr->dwSize, 4, 1, fp) != 1)
+    return 0;
+
+  /* get file flags */
+  if (fread(&dds_fhdr->dwFlags, 4, 1, fp) != 1)
+    return 0;
+
+  /* height of dds in pixels */
+  if (fread(&dds_fhdr->dwHeight, 4, 1, fp) != 1)
+    return 0;
+
+  /* width of dds in pixels */
+  if (fread(&dds_fhdr->dwWidth, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwLinearSize, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwDepth, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwMipMapCount, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwReserved1, 4 * 11, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwSize, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwFlags, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwFourCC, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRGBBitCount, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwGBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwBBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->ddpf.dwRGBAlphaBitMask, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwCaps1, 4, 1, fp) != 1)
+    return 0;
+
+  if (fread(&dds_fhdr->dwCaps2, 4, 1, fp) != 1)
+    return 0;
+
+  return 1;
+}
+
+uint8*
+TxImage::readDDS(FILE* fp, int* width, int* height, uint16* format)
+{
+  uint8 *image = NULL;
+  DDSFILEHEADER dds_fhdr;
+  uint16 tmpformat = 0;
+
+  /* initialize */
+  *width  = 0;
+  *height = 0;
+  *format = 0;
+
+  /* check if we have a valid dds file */
+  if (!fp)
+    return NULL;
+
+  if (!getDDSInfo(fp, &dds_fhdr)) {
+    INFO(80, L"error reading dds file! dds image is corrupt.\n");
+    return NULL;
+  }
+
+  DBG_INFO(80, L"dds format %d x %d HeaderSize %d LinearSize %d\n",
+           dds_fhdr.dwWidth, dds_fhdr.dwHeight, dds_fhdr.dwSize, dds_fhdr.dwLinearSize);
+
+  if (!(dds_fhdr.dwFlags & (DDSD_CAPS|DDSD_WIDTH|DDSD_HEIGHT|DDSD_PIXELFORMAT|DDSD_LINEARSIZE))) {
+    DBG_INFO(80, L"Error: incompatible dds format!\n");
+    return NULL;
+  }
+
+  if ((dds_fhdr.dwFlags & DDSD_MIPMAPCOUNT) && dds_fhdr.dwMipMapCount != 1) {
+    DBG_INFO(80, L"Error: mipmapped dds not supported!\n");
+    return NULL;
+  }
+
+  if (!((dds_fhdr.ddpf.dwFlags & DDPF_FOURCC) && dds_fhdr.dwCaps2 == 0)) {
+    DBG_INFO(80, L"Error: not fourcc standard texture!\n");
+    return NULL;
+  }
+
+  if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT1", 4) == 0) {
+    DBG_INFO(80, L"DXT1 format\n");
+    /* compensate for missing LinearSize */
+    dds_fhdr.dwLinearSize = (dds_fhdr.dwWidth * dds_fhdr.dwHeight) >> 1;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT1;
+  } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT3", 4) == 0) {
+    DBG_INFO(80, L"DXT3 format\n");
+    dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT3;
+  } else if (memcmp(&dds_fhdr.ddpf.dwFourCC, "DXT5", 4) == 0) {
+    DBG_INFO(80, L"DXT5 format\n");
+    dds_fhdr.dwLinearSize = dds_fhdr.dwWidth * dds_fhdr.dwHeight;
+    tmpformat = GR_TEXFMT_ARGB_CMP_DXT5;
+  } else {
+    DBG_INFO(80, L"Error: not DXT1 or DXT3 or DXT5 format!\n");
+    return NULL;
+  }
+
+  /* read in image */
+  image = (uint8*)malloc(dds_fhdr.dwLinearSize);
+  if (image) {
+    *width  = dds_fhdr.dwWidth;
+    *height = dds_fhdr.dwHeight;
+    *format = tmpformat;
+
+    fseek(fp, 128, SEEK_SET); /* size of header is 128 bytes */
+    fread(image, dds_fhdr.dwLinearSize, 1, fp);
+  }
+
+  return image;
+}
diff --git a/GLideNHQ/TxImage.h b/GLideNHQ/TxImage.h
new file mode 100644
index 00000000..bf69c31b
--- /dev/null
+++ b/GLideNHQ/TxImage.h
@@ -0,0 +1,116 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXIMAGE_H__
+#define __TXIMAGE_H__
+
+#include <stdio.h>
+#include <png.h>
+#include "TxInternal.h"
+
+#ifndef WIN32
+typedef struct tagBITMAPFILEHEADER {
+  unsigned short bfType;
+  unsigned long  bfSize;
+  unsigned short bfReserved1;
+  unsigned short bfReserved2;
+  unsigned long  bfOffBits;
+} BITMAPFILEHEADER;
+
+typedef struct tagBITMAPINFOHEADER {
+  unsigned long  biSize;
+  long           biWidth;
+  long           biHeight;
+  unsigned short biPlanes;
+  unsigned short biBitCount;
+  unsigned long  biCompression;
+  unsigned long  biSizeImage;
+  long           biXPelsPerMeter;
+  long           biYPelsPerMeter;
+  unsigned long  biClrUsed;
+  unsigned long  biClrImportant;
+} BITMAPINFOHEADER;
+#else
+typedef struct tagBITMAPFILEHEADER BITMAPFILEHEADER;
+typedef struct tagBITMAPINFOHEADER BITMAPINFOHEADER;
+#endif
+
+#define DDSD_CAPS	0x00000001
+#define DDSD_HEIGHT	0x00000002
+#define DDSD_WIDTH	0x00000004
+#define DDSD_PITCH	0x00000008
+#define DDSD_PIXELFORMAT	0x00001000
+#define DDSD_MIPMAPCOUNT	0x00020000
+#define DDSD_LINEARSIZE	0x00080000
+#define DDSD_DEPTH	0x00800000
+
+#define DDPF_ALPHAPIXELS	0x00000001
+#define DDPF_FOURCC	0x00000004
+#define DDPF_RGB	0x00000040
+
+#define DDSCAPS_COMPLEX	0x00000008
+#define DDSCAPS_TEXTURE	0x00001000
+#define DDSCAPS_MIPMAP	0x00400000
+
+typedef struct tagDDSPIXELFORMAT {
+  unsigned long dwSize;
+  unsigned long dwFlags;
+  unsigned long dwFourCC;
+  unsigned long dwRGBBitCount;
+  unsigned long dwRBitMask;
+  unsigned long dwGBitMask;
+  unsigned long dwBBitMask;
+  unsigned long dwRGBAlphaBitMask;
+} DDSPIXELFORMAT;
+
+typedef struct tagDDSFILEHEADER {
+  unsigned long dwMagic;
+  unsigned long dwSize;
+  unsigned long dwFlags;
+  unsigned long dwHeight;
+  unsigned long dwWidth;
+  unsigned long dwLinearSize;
+  unsigned long dwDepth;
+  unsigned long dwMipMapCount;
+  unsigned long dwReserved1[11];
+  DDSPIXELFORMAT ddpf;
+  unsigned long dwCaps1;
+  unsigned long dwCaps2;
+} DDSFILEHEADER;
+
+class TxImage
+{
+private:
+  boolean getPNGInfo(FILE *fp, png_structp *png_ptr, png_infop *info_ptr);
+  boolean getBMPInfo(FILE *fp, BITMAPFILEHEADER *bmp_fhdr, BITMAPINFOHEADER *bmp_ihdr);
+  boolean getDDSInfo(FILE *fp, DDSFILEHEADER *dds_fhdr);
+public:
+  TxImage() {}
+  ~TxImage() {}
+  uint8* readPNG(FILE* fp, int* width, int* height, uint16* format);
+  boolean writePNG(uint8* src, FILE* fp, int width, int height, int rowStride, uint16 format, uint8 *palette);
+  uint8* readBMP(FILE* fp, int* width, int* height, uint16* format);
+  uint8* readDDS(FILE* fp, int* width, int* height, uint16* format);
+};
+
+#endif /* __TXIMAGE_H__ */
diff --git a/GLideNHQ/TxInternal.h b/GLideNHQ/TxInternal.h
new file mode 100644
index 00000000..3f0be6d9
--- /dev/null
+++ b/GLideNHQ/TxInternal.h
@@ -0,0 +1,100 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INTERNAL_H__
+#define __INTERNAL_H__
+
+#include "Ext_TxFilter.h"
+
+/* dll exports */
+#ifdef TXFILTER_DLL
+#define TAPI __declspec(dllexport)
+#define TAPIENTRY
+#else
+#define TAPI
+#define TAPIENTRY
+#endif
+
+typedef unsigned char  uint8;
+typedef unsigned short uint16;
+typedef unsigned long  uint32;
+
+#ifdef WIN32
+#define KBHIT(key) ((GetAsyncKeyState(key) & 0x8001) == 0x8001)
+#else
+#define KBHIT(key) (0)
+#endif
+
+/* from OpenGL glext.h */
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT   0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT  0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT  0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT  0x83F3
+
+/* for explicit fxt1 compression */
+#define CC_CHROMA 0x0
+#define CC_HI     0x1
+#define CC_ALPHA  0x2
+
+/* in-memory zlib texture compression */
+#define GR_TEXFMT_GZ                 0x8000
+
+#if 0 /* this is here to remind me of other formats */
+/* from 3Dfx Interactive Inc. glide.h */
+#define GR_TEXFMT_8BIT                  0x0
+#define GR_TEXFMT_RGB_332               GR_TEXFMT_8BIT
+#define GR_TEXFMT_YIQ_422               0x1
+#define GR_TEXFMT_ALPHA_8               0x2 /* (0..0xFF) alpha     */
+#define GR_TEXFMT_INTENSITY_8           0x3 /* (0..0xFF) intensity */
+#define GR_TEXFMT_ALPHA_INTENSITY_44    0x4
+#define GR_TEXFMT_P_8                   0x5 /* 8-bit palette */
+#define GR_TEXFMT_RSVD0                 0x6 /* GR_TEXFMT_P_8_RGBA */
+#define GR_TEXFMT_P_8_6666              GR_TEXFMT_RSVD0
+#define GR_TEXFMT_P_8_6666_EXT          GR_TEXFMT_RSVD0
+#define GR_TEXFMT_RSVD1                 0x7
+#define GR_TEXFMT_16BIT                 0x8
+#define GR_TEXFMT_ARGB_8332             GR_TEXFMT_16BIT
+#define GR_TEXFMT_AYIQ_8422             0x9
+#define GR_TEXFMT_RGB_565               0xa
+#define GR_TEXFMT_ARGB_1555             0xb
+#define GR_TEXFMT_ARGB_4444             0xc
+#define GR_TEXFMT_ALPHA_INTENSITY_88    0xd
+#define GR_TEXFMT_AP_88                 0xe /* 8-bit alpha 8-bit palette */
+#define GR_TEXFMT_RSVD2                 0xf
+#define GR_TEXFMT_RSVD4                 GR_TEXFMT_RSVD2
+
+/* from 3Dfx Interactive Inc. g3ext.h */
+#define GR_TEXFMT_ARGB_CMP_FXT1        0x11
+#define GR_TEXFMT_ARGB_8888            0x12
+#define GR_TEXFMT_YUYV_422             0x13
+#define GR_TEXFMT_UYVY_422             0x14
+#define GR_TEXFMT_AYUV_444             0x15
+#define GR_TEXFMT_ARGB_CMP_DXT1        0x16
+#define GR_TEXFMT_ARGB_CMP_DXT2        0x17
+#define GR_TEXFMT_ARGB_CMP_DXT3        0x18
+#define GR_TEXFMT_ARGB_CMP_DXT4        0x19
+#define GR_TEXFMT_ARGB_CMP_DXT5        0x1A
+#define GR_TEXTFMT_RGB_888             0xFF
+#endif
+
+#endif /* __INTERNAL_H__ */
diff --git a/GLideNHQ/TxQuantize.cpp b/GLideNHQ/TxQuantize.cpp
new file mode 100644
index 00000000..30959192
--- /dev/null
+++ b/GLideNHQ/TxQuantize.cpp
@@ -0,0 +1,2394 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+/* NOTE: The codes are not optimized. They can be made faster. */
+
+#include "TxQuantize.h"
+#include <boost/thread.hpp>
+#include <boost/bind.hpp>
+
+TxQuantize::TxQuantize()
+{
+  _txUtil = new TxUtil();
+
+  /* get number of CPU cores. */
+  _numcore = _txUtil->getNumberofProcessors();
+
+  /* get dxtn extensions */
+  _tx_compress_fxt1 = TxLoadLib::getInstance()->getfxtCompressTexFuncExt();
+  _tx_compress_dxtn = TxLoadLib::getInstance()->getdxtCompressTexFuncExt();
+}
+
+
+TxQuantize::~TxQuantize()
+{
+  delete _txUtil;
+}
+
+void
+TxQuantize::ARGB1555_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0x00008000) ? 0xff000000 : 0x00000000) |
+            ((*src & 0x00007c00) << 9) | ((*src & 0x00007000) << 4) |
+            ((*src & 0x000003e0) << 6) | ((*src & 0x00000380) << 1) |
+            ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
+    dest++;
+    *dest = (((*src & 0x80000000) ? 0xff000000 : 0x00000000) |
+            ((*src & 0x7c000000) >>  7) | ((*src & 0x70000000) >> 12) |
+            ((*src & 0x03e00000) >> 10) | ((*src & 0x03800000) >> 15) |
+            ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // arrr rrgg gggb bbbb
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;         // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    mov ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent1;
+    mov ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent1:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shl edx, 4;           // edx = 00000000 00000rrr rr000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000rrr rr000000 00000000
+    shl edx, 5;           // edx = 00000000 rrrrr000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 3;           // edx = 00000000 00000000 00000000 bbbbb000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000bbb
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    shr eax, 16;          // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    mov edx, eax;         // edx = 00000000 00000000 arrrrrgg gggbbbbb
+    mov ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent2;
+    mov ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent2:
+    mov eax, edx;         // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shl edx, 4;           // edx = 00000000 00000rrr rr000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000rrr rr000000 00000000
+    shl edx, 5;           // edx = 00000000 rrrrr000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr rr000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa rrrrrrrr 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa rrrrrrrr gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 3;           // edx = 00000000 00000000 00000000 bbbbb000
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbb000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000bbb
+    or  ebx, edx;         // ebx = aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB4444_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = ((*src & 0x0000f000) << 12) |
+            ((*src & 0x00000f00) << 8) |
+            ((*src & 0x000000f0) << 4) |
+             (*src & 0x0000000f);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = ((*src & 0xf0000000) |
+            ((*src & 0x0f000000) >> 4) |
+            ((*src & 0x00f00000) >> 8) |
+            ((*src & 0x000f0000) >> 12));
+    *dest |= (*dest >> 4);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaa rrrr gggg bbbb
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shl ebx, 8;          // 00000000 0000rrrr 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and eax, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
+    or  eax, ebx;        // 0000aaaa 0000rrrr 0000gggg 0000bbbb
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 rrrr0000 gggg0000 bbbb0000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    shr edx, 16;
+    mov ebx, edx;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shl ebx, 8;          // 00000000 0000rrrr 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 0000rrrr aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and edx, 0x0f0f000f; // 0000aaaa 0000rrrr 00000000 0000bbbb
+    or  edx, ebx;        // 0000aaaa 0000rrrr 0000gggg 0000bbbb
+    mov ebx, edx;
+    shl ebx, 4;          // aaaa0000 rrrr0000 gggg0000 bbbb0000
+    or  edx, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::RGB565_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (0xff000000 |
+            ((*src & 0x0000f800) << 8) | ((*src & 0x0000e000) << 3) |
+            ((*src & 0x000007e0) << 5) | ((*src & 0x00000600) >> 1) |
+            ((*src & 0x0000001f) << 3) | ((*src & 0x0000001c) >> 2));
+    dest++;
+    *dest = (0xff000000 |
+            ((*src & 0xf8000000) >>  8) | ((*src & 0xe0000000) >> 13) |
+            ((*src & 0x07e00000) >> 11) | ((*src & 0x06000000) >> 17) |
+            ((*src & 0x001f0000) >> 13) | ((*src & 0x001c0000) >> 18));
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // rrrr rggg gggb bbbb
+    // 11111111 rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
+    shl ebx, 5;          // 00000000 000rrrrr 00000000 00000000
+    or  eax, ebx;        // 00000000 000rrrrr rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
+    shl ebx, 5;          // 00000000 00000000 gggggg00 00000000
+    and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
+    shl eax, 3;          // 00000000 rrrrr000 00000000 bbbbb000
+    or  eax, ebx;        // 00000000 rrrrr000 gggggg00 bbbbb000
+    mov ebx, eax;
+    shr ebx, 5;          // 00000000 00000rrr rr000ggg ggg00bbb
+    and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
+    or  eax, ebx;        // 00000000 rrrrrrrr gggggg00 bbbbbbbb
+    mov ebx, eax;
+    shr ebx, 6;
+    and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
+    or  eax, ebx         // 00000000 rrrrrrrr gggggggg bbbbbbbb
+    or  eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    shr edx, 16;
+    mov eax, edx;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 rrrrrggg gggbbbbb
+    and ebx, 0x0000f800; // 00000000 00000000 rrrrr000 00000000
+    shl ebx, 5;          // 00000000 000rrrrr 00000000 00000000
+    or  eax, ebx;        // 00000000 000rrrrr rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x000007e0; // 00000000 00000000 00000ggg ggg00000
+    shl ebx, 5;          // 00000000 00000000 gggggg00 00000000
+    and eax, 0x001F001F; // 00000000 000rrrrr 00000000 000bbbbb
+    shl eax, 3;          // 00000000 rrrrr000 00000000 bbbbb000
+    or  eax, ebx;        // 00000000 rrrrr000 gggggg00 bbbbb000
+    mov ebx, eax;
+    shr ebx, 5;          // 00000000 00000rrr rr000ggg ggg00bbb
+    and ebx, 0x00070007; // 00000000 00000rrr 00000000 00000bbb
+    or  eax, ebx;        // 00000000 rrrrrrrr gggggg00 bbbbbbbb
+    mov ebx, eax;
+    shr ebx, 6;
+    and ebx, 0x00000300; // 00000000 00000000 000000gg 00000000
+    or  eax, ebx         // 00000000 rrrrrrrr gggggggg bbbbbbbb
+    or  eax, 0xff000000; // 11111111 rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::A8_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x000000ff);
+    *dest |= (*dest << 8);
+    *dest |= (*dest << 16);
+    dest++;
+    *dest = (*src & 0x0000ff00);
+    *dest |= (*dest >> 8);
+    *dest |= (*dest << 16);
+    dest++;
+    *dest = (*src & 0x00ff0000);
+    *dest |= (*dest << 8);
+    *dest |= (*dest >> 16);
+    dest++;
+    *dest = (*src & 0xff000000);
+    *dest |= (*dest >> 8);
+    *dest |= (*dest >> 16);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa
+    // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+    mov edx, eax;
+    and eax, 0x000000ff;
+    mov ebx, eax;        // 00000000 00000000 00000000 aaaaaaaa
+    shl ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    mov ebx, eax;
+    shl ebx, 16;         // aaaaaaaa aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000ff00;
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    mov ebx, eax;
+    shl ebx, 16;         // aaaaaaaa aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00ff0000;
+    mov ebx, eax;        // 00000000 aaaaaaaa 00000000 00000000
+    shl ebx, 8;          // aaaaaaaa 00000000 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    mov ebx, eax;
+    shr ebx, 16;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xff000000;
+    mov ebx, eax;        // aaaaaaaa 00000000 00000000 00000000
+    shr ebx, 8;          // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    mov ebx, eax;
+    shr ebx, 16;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+    or  eax, ebx;        // aaaaaaaa rrrrrrrr gggggggg bbbbbbbb
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::AI44_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x0000000f);
+    *dest |= ((*dest << 8) | (*dest << 16));
+    *dest |= ((*src & 0x000000f0) << 20);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = (*src & 0x00000f00);
+    *dest |= ((*dest << 8) | (*dest >> 8));
+    *dest |= ((*src & 0x0000f000) << 12);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = (*src & 0x000f0000);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= ((*src & 0x00f00000) << 4);
+    *dest |= (*dest << 4);
+    dest++;
+    *dest = ((*src & 0x0f000000) >> 4);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= (*src & 0xf0000000);
+    *dest |= (*dest >> 4);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaiiii
+    // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+    mov edx, eax;
+    and eax, 0x000000f0; // 00000000 00000000 00000000 aaaa0000
+    mov ebx, edx;
+    shl eax, 20;         // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 00000000 00000000 0000iiii
+    shl ebx, 8;          // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 0000iiii
+    shl ebx, 8;          // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    mov ebx, edx;
+    shl eax, 12;         // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x00000f00; // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 00000000 0000iiii 0000iiii
+    shl ebx, 16;         // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00f00000; // 00000000 aaaa0000 00000000 00000000
+    mov ebx, edx;
+    shl eax, 4;          // 0000aaaa 00000000 00000000 00000000
+    and ebx, 0x000f0000; // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000iiii
+    or  eax, ebx;        // 0000aaaa 0000iiii 0000iiii 0000iiii
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 iiii0000 iiii0000 iiii0000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xf0000000; // aaaa0000 00000000 00000000 00000000
+    mov ebx, edx;
+    and ebx, 0x0f000000; // 0000iiii 00000000 00000000 00000000
+    shr ebx, 4;          // 00000000 iiii0000 00000000 00000000
+    or  eax, ebx;        // aaaa0000 iiii0000 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 iiii0000 00000000
+    or  eax, ebx;        // aaaa0000 iiii0000 iiii0000 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 iiii0000
+    or  eax, ebx;        // aaaa0000 iiii0000 iiii0000 iiii0000
+    mov ebx, eax;
+    shr ebx, 4;          // 0000aaaa 0000iiii 0000iiii 0000iiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::AI88_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x000000ff);
+    *dest |= ((*dest << 8) | (*dest << 16));
+    *dest |= ((*src & 0x0000ff00) << 16);
+    dest++;
+    *dest = (*src & 0x00ff0000);
+    *dest |= ((*dest >> 8) | (*dest >> 16));
+    *dest |= (*src & 0xff000000);
+    dest++;
+    src++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa iiiiiiii
+    // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+    mov edx, eax;
+    and eax, 0x0000ffff; // 00000000 00000000 aaaaaaaa iiiiiiii
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa iiiiiiii
+    shl eax, 16;         // aaaaaaaa iiiiiiii 00000000 00000000
+    and ebx, 0x000000ff; // 00000000 00000000 00000000 iiiiiiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii 00000000 iiiiiiii
+    shl ebx, 8;          // 00000000 00000000 iiiiiiii 00000000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xffff0000; // aaaaaaaa iiiiiiii 00000000 00000000
+    mov ebx, eax;        // aaaaaaaa iiiiiiii 00000000 00000000
+    and ebx, 0x00ff0000; // 00000000 iiiiiiii 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 iiiiiiii 00000000
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 iiiiiiii
+    or  eax, ebx;        // aaaaaaaa iiiiiiii iiiiiiii iiiiiiii
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_ARGB1555(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = ((*src & 0xff000000) ? 0x00008000 : 0x00000000);
+    *dest |= (((*src & 0x00f80000) >> 9) |
+              ((*src & 0x0000f800) >> 6) |
+              ((*src & 0x000000f8) >> 3));
+    src++;
+    *dest |= ((*src & 0xff000000) ? 0x80000000 : 0x00000000);
+    *dest |= (((*src & 0x00f80000) << 7) |
+              ((*src & 0x0000f800) << 10) |
+              ((*src & 0x000000f8) << 13));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+#if 1
+    mov edx, eax;
+    and eax, 0xff000000;  // aaaa0000 00000000 00000000 00000000
+    jz transparent1;
+    mov eax, 0x00008000;  // 00000000 00000000 a0000000 00000000
+
+  transparent1:
+    mov ebx, edx;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 9;           // 00000000 00000000 0rrrrr00 00000000
+    or  eax, ebx;         // 00000000 00000000 arrrrr00 00000000
+    mov ebx, edx;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shr ebx, 6;           // 00000000 00000000 000000gg ggg00000
+    or  eax, ebx;         // 00000000 00000000 arrrrrgg ggg00000
+    and edx, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shr edx, 3;           // 00000000 00000000 00000000 000bbbbb
+    or  edx, eax;         // 00000000 00000000 arrrrrgg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xff000000;  // aaaa0000 00000000 00000000 00000000
+    jz transparent2;
+    or  edx, 0x80000000;  // a0000000 00000000 arrrrrgg gggbbbbb
+
+  transparent2:
+    mov eax, ebx;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 7;           // 0rrrrr00 00000000 00000000 00000000
+    or  edx, ebx;         // arrrrr00 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shl ebx, 10;          // 000000gg ggg00000 00000000 00000000
+    or  edx, ebx;         // arrrrrgg ggg00000 arrrrrgg gggbbbbb
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shl eax, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+#else
+    mov edx, eax;
+    and edx, 0x01000000;  // 0000000a 00000000 00000000 00000000
+    shr edx, 9;           // 00000000 00000000 a0000000 00000000
+    mov ebx, eax;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 9;           // 00000000 00000000 0rrrrr00 00000000
+    or  edx, ebx;         // 00000000 00000000 arrrrr00 00000000
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shr ebx, 6;           // 00000000 00000000 000000gg ggg00000
+    or  edx, ebx;         // 00000000 00000000 arrrrrgg ggg00000
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shr eax, 3;           // 00000000 00000000 00000000 000bbbbb
+    or  edx, eax;         // 00000000 00000000 arrrrrgg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0x80000000;  // a0000000 00000000 00000000 00000000
+    or  edx, ebx;         // a0000000 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00f80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 7;           // 0rrrrr00 00000000 00000000 00000000
+    or  edx, ebx;         // arrrrr00 00000000 arrrrrgg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f800;  // 00000000 00000000 ggggg000 00000000
+    shl ebx, 10;          // 000000gg ggg00000 00000000 00000000
+    or  edx, ebx;         // arrrrrgg ggg00000 arrrrrgg gggbbbbb
+    and eax, 0x000000f8;  // 00000000 00000000 00000000 bbbbb000
+    shl eax, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+#endif
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_ARGB4444(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xf0000000) >> 16) |
+             ((*src & 0x00f00000) >> 12) |
+             ((*src & 0x0000f000) >> 8) |
+             ((*src & 0x000000f0) >> 4));
+    src++;
+    *dest |= ((*src & 0xf0000000) |
+              ((*src & 0x00f00000) << 4) |
+              ((*src & 0x0000f000) << 8) |
+              ((*src & 0x000000f0) << 12));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0xf0000000;  // aaaa0000 00000000 00000000 00000000
+    shr edx, 16;          // 00000000 00000000 aaaa0000 00000000
+    mov ebx, eax;
+    and ebx, 0x00f00000;  // 00000000 rrrr0000 00000000 00000000
+    shr ebx, 12;          // 00000000 00000000 0000rrrr 00000000
+    or  edx, ebx;         // 00000000 00000000 aaaarrrr 00000000
+    mov ebx, eax;
+    and ebx, 0x0000f000;  // 00000000 00000000 gggg0000 00000000
+    shr ebx, 8;           // 00000000 00000000 00000000 gggg0000
+    or  edx, ebx;         // 00000000 00000000 aaaarrrr gggg0000
+    and eax, 0x000000f0;  // 00000000 00000000 00000000 bbbb0000
+    shr eax, 4;           // 00000000 00000000 00000000 0000bbbb
+    or  edx, eax;         // 00000000 00000000 aaaarrrr ggggbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0xf0000000;  // aaaa0000 00000000 00000000 00000000
+    or  edx, ebx;         // aaaa0000 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00f00000;  // 00000000 rrrr0000 00000000 00000000
+    shl ebx, 4;           // 0000rrrr 00000000 00000000 00000000
+    or  edx, ebx;         // aaaarrrr 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x0000f000;  // 00000000 00000000 gggg0000 00000000
+    shl ebx, 8;           // 00000000 gggg0000 00000000 00000000
+    or  edx, ebx;         // aaaarrrr gggg0000 aaaarrrr ggggbbbb
+    and eax, 0x000000f0;  // 00000000 00000000 00000000 bbbb0000
+    shl eax, 12;          // 00000000 0000bbbb 00000000 00000000
+    or  edx, eax;         // arrrrrgg ggggbbbb aaaarrrr ggggbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_RGB565(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0x000000f8) >> 3) |
+             ((*src & 0x0000fc00) >> 5) |
+             ((*src & 0x00f80000) >> 8));
+    src++;
+    *dest |= (((*src & 0x000000f8) << 13) |
+              ((*src & 0x0000fc00) << 11) |
+              ((*src & 0x00f80000) << 8));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0x000000F8;  // 00000000 00000000 00000000 bbbbb000
+    shr edx, 3;           // 00000000 00000000 00000000 000bbbbb
+    mov ebx, eax;
+    and ebx, 0x0000FC00;  // 00000000 00000000 gggggg00 00000000
+    shr ebx, 5;           // 00000000 00000000 00000ggg ggg00000
+    or  edx, ebx;         // 00000000 00000000 00000ggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00F80000;  // 00000000 rrrrr000 00000000 00000000
+    shr ebx, 8;           // 00000000 00000000 rrrrr000 00000000
+    or  edx, ebx;         // 00000000 00000000 rrrrrggg gggbbbbb
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and ebx, 0x000000F8;  // 00000000 00000000 00000000 bbbbb000
+    shl ebx, 13;          // 00000000 000bbbbb 00000000 00000000
+    or  edx, ebx;         // 00000000 000bbbbb rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x0000FC00;  // 00000000 00000000 gggggg00 00000000
+    shl ebx, 11;          // 00000ggg ggg00000 00000000 00000000
+    or  edx, ebx;         // 00000ggg gggbbbbb rrrrrggg gggbbbbb
+    mov ebx, eax;
+    and ebx, 0x00F80000;  // 00000000 rrrrr000 00000000 00000000
+    shl ebx, 8;           // rrrrr000 00000000 00000000 00000000
+    or  edx, ebx;         // rrrrrggg gggbbbbb rrrrrggg gggbbbbb
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_A8(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (*src & 0x0000ff00) >> 8;
+    src++;
+    *dest |= (*src & 0x0000ff00);
+    src++;
+    *dest |= ((*src & 0x0000ff00) << 8);
+    src++;
+    *dest |= ((*src & 0x0000ff00) << 16);
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+#if 0
+    mov edx, eax;         // we'll use A comp for every pixel
+    and edx, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr edx, 24;          // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr eax, 16;          // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    shr eax, 8;           // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+
+#if 1
+    mov edx, eax;         // we'll use G comp for every pixel
+    and edx, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shr edx, 8;           // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shl eax, 8;           // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    shl eax, 16;          // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+
+#if 0
+    mov edx, eax;
+    and edx, 0x000000FF;  // 00000000 00000000 00000000 aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x0000FF00;  // 00000000 00000000 aaaaaaaa 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0x00FF0000;  // 00000000 aaaaaaaa 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xFF000000;  // aaaaaaaa 00000000 00000000 00000000
+    or  edx, eax;         // aaaaaaaa aaaaaaaa aaaaaaaa aaaaaaaa
+#endif
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_AI44(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 2;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xf0000000) >> 24) | ((*src & 0x0000f000) >> 12));
+    src++;
+    *dest |= (((*src & 0xf0000000) >> 16) | ((*src & 0x0000f000) >> 4));
+    src++;
+    *dest |= (((*src & 0xf0000000) >> 8) | ((*src & 0x0000f000) << 4));
+    src++;
+    *dest |= ((*src & 0xf0000000) | ((*src & 0x0000f000) << 12));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;         // use A and G comps MSB
+    and edx, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    mov ebx, eax;
+    shr edx, 24;          // 00000000 00000000 00000000 aaaa0000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shr ebx, 12;          // 00000000 00000000 00000000 0000iiii
+    or  edx, ebx;         // 00000000 00000000 00000000 aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    shr eax, 16;          // 00000000 00000000 aaaa0000 00000000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shr ebx, 4;           // 00000000 00000000 0000iiii 00000000
+    or  eax, ebx;         // 00000000 00000000 aaaaiiii 00000000
+    or  edx, eax;         // 00000000 00000000 aaaaiiii aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;  // aaaa0000 00000000 00000000 00000000
+    shr eax, 8;           // 00000000 aaaa0000 00000000 00000000
+    and ebx, 0x0000F000;  // 00000000 00000000 iiii0000 00000000
+    shl ebx, 4;           // 00000000 0000iiii 00000000 00000000
+    or  eax, ebx;         // 00000000 aaaaiiii 00000000 00000000
+    or  edx, eax;         // 00000000 aaaaiiii aaaaiiii aaaaiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xF0000000;   // aaaa0000 00000000 00000000 00000000
+    and ebx, 0x0000F000;   // 00000000 00000000 iiii0000 00000000
+    shl ebx, 12;           // 0000iiii 00000000 00000000 00000000
+    or  eax, ebx;          // aaaaiiii 00000000 00000000 00000000
+    or  edx, eax;          // aaaaiiii aaaaiiii aaaaiiii aaaaiiii
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+void
+TxQuantize::ARGB8888_AI88(uint32* src, uint32* dest, int width, int height)
+{
+#if 1
+  int siz = (width * height) >> 1;
+  int i;
+  for (i = 0; i < siz; i++) {
+    *dest = (((*src & 0xff000000) >> 16) | ((*src & 0x0000ff00) >> 8));
+    src++;
+    *dest |= ((*src & 0xff000000) | ((*src & 0x0000ff00) << 8));
+    src++;
+    dest++;
+  }
+#else
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov edx, eax;
+    and edx, 0xFF000000;   // aaaaaaaa 00000000 00000000 00000000
+    mov ebx, eax;
+    shr edx, 16;           // 00000000 00000000 aaaaaaaa 00000000
+    and ebx, 0x0000FF00;   // 00000000 00000000 iiiiiiii 00000000
+    shr ebx, 8;            // 00000000 00000000 00000000 iiiiiiii
+    or  edx, ebx;          // 00000000 00000000 aaaaaaaa iiiiiiii
+
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    mov ebx, eax;
+    and eax, 0xFF000000;    // aaaaaaaa 00000000 00000000 00000000
+    and ebx, 0x0000FF00;    // 00000000 00000000 iiiiiiii 00000000
+    shl ebx, 8;             // 00000000 iiiiiiii 00000000 00000000
+    or  eax, ebx;           // aaaaaaaa iiiiiiii 00000000 00000000
+    or  edx, eax;           // aaaaaaaa iiiiiiii aaaaaaaa iiiiiiii
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+/* R.W. Floyd and L. Steinberg, An adaptive algorithm
+ * for spatial grey scale, Proceedings of the Society
+ * of Information Display 17, pp75-77, 1976
+ */
+void
+TxQuantize::ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  int i, x, y;
+  int qr, qg, qb; /* quantized incoming values */
+  int ir, ig, ib; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST in the previous loop 
+       * can't calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000; 
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+
+      /* convert to RGB565 */
+      qr = qr * 0x1F / 2550000;
+      qg = qg * 0x3F / 2550000;
+      qb = qb * 0x1F / 2550000;
+
+      /* this is the dithered pixel */
+      t  = (qr << 11) | (qg << 5) | qb;
+
+      /* compute the errors */
+      qr = ((qr << 3) | (qr >> 2)) * 10000;
+      qg = ((qg << 2) | (qg >> 4)) * 10000;
+      qb = ((qb << 3) | (qb >> 2)) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+}
+
+
+void
+TxQuantize::ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  int i, x, y;
+  int qr, qg, qb; /* quantized incoming values */
+  int ir, ig, ib; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST of the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+
+      /* convert to RGB555 */
+      qr = qr * 0x1F / 2550000;
+      qg = qg * 0x1F / 2550000;
+      qb = qb * 0x1F / 2550000;
+
+      /* this is the dithered pixel */
+      t  = (qr << 10) | (qg << 5) | qb;
+      t |= ((*src >> 24) ? 0x8000 : 0);
+
+      /* compute the errors */
+      qr = ((qr << 3) | (qr >> 2)) * 10000;
+      qg = ((qg << 3) | (qg >> 2)) * 10000;
+      qb = ((qb << 3) | (qb >> 2)) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+}
+
+void
+TxQuantize::ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  /* NOTE: alpha dithering looks better for alpha gradients, but are prone
+   * to producing noisy speckles for constant or step level alpha. Output
+   * results should always be checked.
+   */
+  boolean ditherAlpha = 0;
+
+  int i, x, y;
+  int qr, qg, qb, qa; /* quantized incoming values */
+  int ir, ig, ib, ia; /* incoming values */
+  int t;
+  int *errR = new int[width];
+  int *errG = new int[width];
+  int *errB = new int[width];
+  int *errA = new int[width];
+
+  uint16 *dest = (uint16 *)dst;
+
+  for (i = 0; i < width; i++) errR[i] = errG[i] = errB[i] = errA[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* incoming pixel values */
+      ir = ((*src >> 16) & 0xFF) * 10000;
+      ig = ((*src >>  8) & 0xFF) * 10000;
+      ib = ((*src      ) & 0xFF) * 10000;
+      ia = ((*src >> 24) & 0xFF) * 10000;
+
+      if (x == 0) qr = qg = qb = qa = 0;
+
+      /* quantize pixel values. 
+       * qr * 0.4375 is the error from the pixel to the left, 
+       * errR is the error from the pixel to the top, top left, and top right */
+      /* qr * 0.4375 is the error distribution to the EAST in 
+       * the previous loop */
+      ir += errR[x] + qr * 4375 / 10000;
+      ig += errG[x] + qg * 4375 / 10000;
+      ib += errB[x] + qb * 4375 / 10000;
+      ia += errA[x] + qa * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST of the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errR[x] = qr * 625 / 10000;
+      errG[x] = qg * 625 / 10000;
+      errB[x] = qb * 625 / 10000;
+      errA[x] = qa * 625 / 10000;
+
+      qr = ir;
+      qg = ig;
+      qb = ib;
+      qa = ia;
+
+      /* clamp */
+      if (qr < 0) qr = 0; else if (qr > 2550000) qr = 2550000;
+      if (qg < 0) qg = 0; else if (qg > 2550000) qg = 2550000;
+      if (qb < 0) qb = 0; else if (qb > 2550000) qb = 2550000;
+      if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
+
+      /* convert to RGB444 */
+      qr = qr * 0xF / 2550000;
+      qg = qg * 0xF / 2550000;
+      qb = qb * 0xF / 2550000;
+      qa = qa * 0xF / 2550000;
+
+      /* this is the value to be returned */
+      if (ditherAlpha) {
+        t = (qa << 12) | (qr <<  8) | (qg << 4) | qb;
+      } else {
+        t = (qr <<  8) | (qg << 4) | qb;
+        t |= (*src >> 16) & 0xF000;
+      }
+
+      /* compute the errors */
+      qr = ((qr << 4) | qr) * 10000;
+      qg = ((qg << 4) | qg) * 10000;
+      qb = ((qb << 4) | qb) * 10000;
+      qa = ((qa << 4) | qa) * 10000;
+      qr = ir - qr;
+      qg = ig - qg;
+      qb = ib - qb;
+      qa = ia - qa;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errR[x - 1] += qr * 1875 / 10000;
+        errG[x - 1] += qg * 1875 / 10000;
+        errB[x - 1] += qb * 1875 / 10000;
+        errA[x - 1] += qa * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errR[x] += qr * 3125 / 10000;
+      errG[x] += qg * 3125 / 10000;
+      errB[x] += qb * 3125 / 10000;
+      errA[x] += qa * 3125 / 10000;
+
+      *dest = (t & 0xFFFF);
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errR;
+  delete [] errG;
+  delete [] errB;
+  delete [] errA;
+}
+
+void
+TxQuantize::ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height)
+{
+  /* Floyd-Steinberg error-diffusion halftoning */
+
+  /* NOTE: alpha dithering looks better for alpha gradients, but are prone
+   * to producing noisy speckles for constant or step level alpha. Output
+   * results should always be checked.
+   */
+  boolean ditherAlpha = 0;
+
+  int i, x, y;
+  int qi, qa; /* quantized incoming values */
+  int ii, ia; /* incoming values */
+  int t;
+  int *errI = new int[width];
+  int *errA = new int[width];
+
+  uint8 *dest = (uint8 *)dst;
+
+  for (i = 0; i < width; i++) errI[i] = errA[i] = 0;
+
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114 */
+      ii = ((*src >> 16) & 0xFF) * 2990 +
+           ((*src >>  8) & 0xFF) * 5870 +
+           ((*src      ) & 0xFF) * 1140;
+      ia = ((*src >> 24) & 0xFF) * 10000;
+
+      if (x == 0) qi = qa = 0;
+
+      /* quantize pixel values. 
+       * qi * 0.4375 is the error from the pixel to the left, 
+       * errI is the error from the pixel to the top, top left, and top right */
+      /* qi * 0.4375 is the error distrtibution to the EAST in
+       * the previous loop */
+      ii += errI[x] + qi * 4375 / 10000;
+      ia += errA[x] + qa * 4375 / 10000;
+
+      /* error distribution to the SOUTH-EAST in the previous loop. 
+       * cannot calculate in the previous loop because it steps on 
+       * the above quantization */
+      errI[x] = qi * 625 / 10000;
+      errA[x] = qa * 625 / 10000;
+
+      qi = ii;
+      qa = ia;
+
+      /* clamp */
+      if (qi < 0) qi = 0; else if (qi > 2550000) qi = 2550000;
+      if (qa < 0) qa = 0; else if (qa > 2550000) qa = 2550000;
+
+      /* convert to I4 */
+      qi = qi * 0xF / 2550000;
+      qa = qa * 0xF / 2550000;
+
+      /* this is the value to be returned */
+      if (ditherAlpha) {
+        t = (qa << 4) | qi;
+      } else {
+        t = qi;
+        t |= ((*src >> 24) & 0xF0);
+      }
+
+      /* compute the errors */
+      qi = ((qi << 4) | qi) * 10000;
+      qa = ((qa << 4) | qa) * 10000;
+      qi = ii - qi;
+      qa = ia - qa;
+
+      /* compute the error distributions */
+      /* Floyd-Steinberg filter
+       * 7/16 (=0.4375) to the EAST 
+       * 5/16 (=0.3125) to the SOUTH 
+       * 1/16 (=0.0625) to the SOUTH-EAST 
+       * 3/16 (=0.1875) to the SOUTH-WEST
+       *
+       *         x    7/16
+       *  3/16  5/16  1/16
+       */
+      /* SOUTH-WEST */
+      if (x > 1) {
+        errI[x - 1] += qi * 1875 / 10000;
+        errA[x - 1] += qa * 1875 / 10000;
+      }
+
+      /* SOUTH */
+      errI[x] += qi * 3125 / 10000;
+      errA[x] += qa * 3125 / 10000;
+
+      *dest = t & 0xFF;
+
+      dest++;
+      src++;
+    }
+  }
+
+  delete [] errI;
+  delete [] errA;
+}
+
+void
+TxQuantize::ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height)
+{
+  int x, y;
+  uint16 *dest = (uint16 *)dst;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+#if 1
+      /* libpng style grayscale conversion.
+       * Reduce RGB files to grayscale with or without alpha
+       * using the equation given in Poynton's ColorFAQ at
+       * <http://www.inforamp.net/~poynton/>
+       * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
+       *
+       *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
+       *
+       *  We approximate this with
+       *
+       *     Y = 0.21268 * R    + 0.7151 * G    + 0.07217 * B
+       *
+       *  which can be expressed with integers as
+       *
+       *     Y = (6969 * R + 23434 * G + 2365 * B)/32768
+       *
+       *  The calculation is to be done in a linear colorspace.
+       */
+      *dest = (((int)((((*src >> 16) & 0xFF) * 6969 +
+                       ((*src >>  8) & 0xFF) * 23434 +
+                       ((*src      ) & 0xFF) * 2365) / 32768) & 0xFF) |
+              (uint16)((*src >> 16) & 0xFF00));
+#else
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
+       * this is same as the standard NTSC gray scale conversion. */
+      *dest = (((int)((((*src >> 16) & 0xFF) * 299 +
+                       ((*src >>  8) & 0xFF) * 587 +
+                       ((*src      ) & 0xFF) * 114) / 1000) & 0xFF) |
+              (uint16)((*src >> 16) & 0xFF00));
+#endif
+      dest++;
+      src++;
+    }
+  }
+}
+
+void
+TxQuantize::ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height)
+{
+  int x, y;
+  uint8 *dest = (uint8 *)dst;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+#if 1
+      /* libpng style Intensity = (6969 * R + 23434 * G + 2365 * B)/32768 */
+      *dest = (int)((((*src >> 16) & 0xFF) * 6969 +
+                     ((*src >>  8) & 0xFF) * 23434 +
+                     ((*src      ) & 0xFF) * 2365) / 32768) & 0xFF;
+#else
+      /* 3dfx style Intensity = R * 0.299 + G * 0.587 + B * 0.114
+       * this is same as the standard NTSC gray scale conversion. */
+      *dest = (int)((((*src >>16) & 0xFF) * 299 +
+                     ((*src >> 8) & 0xFF) * 587 +
+                     ((*src     ) & 0xFF) * 114) / 1000) & 0xFF;
+#endif
+      dest++;
+      src++;
+    }
+  }
+}
+
+void
+TxQuantize::P8_16BPP(uint32* src, uint32* dest, int width, int height, uint32* palette)
+{
+  /* passed in palette is RGBA5551 format */
+#if 1
+  int i;
+  int size = width * height;
+  for (i = 0; i < size; i++) {
+    ((uint16*)dest)[i] = ((uint16*)palette)[(int)(((uint8*)src)[i])];
+    ((uint16*)dest)[i] = ((((uint16*)dest)[i] << 15) | (((uint16*)dest)[i] >> 1));
+  }
+#else
+
+  /* not finished yet... */
+
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+    mov edx, dword ptr [palette];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+#endif
+}
+
+boolean
+TxQuantize::quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer)
+{
+  typedef void (TxQuantize::*quantizerFunc)(uint32* src, uint32* dest, int width, int height);
+  quantizerFunc quantizer;
+  int bpp_shift = 0;
+
+  if (destformat == GR_TEXFMT_ARGB_8888) {
+    switch (srcformat) {
+    case GR_TEXFMT_ARGB_1555:
+      quantizer = &TxQuantize::ARGB1555_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ARGB_4444:
+      quantizer = &TxQuantize::ARGB4444_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_RGB_565:
+      quantizer = &TxQuantize::RGB565_ARGB8888;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ALPHA_8:
+      quantizer = &TxQuantize::A8_ARGB8888;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_44:
+      quantizer = &TxQuantize::AI44_ARGB8888;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_88:
+      quantizer = &TxQuantize::AI88_ARGB8888;
+      bpp_shift = 1;
+      break;
+    default:
+      return 0;
+    }
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (height >> 2) / numcore;
+      numcore--;
+    }
+    if (blkrow > 0 && numcore > 1) {
+      boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (width * blkheight) << (2 - bpp_shift);
+      unsigned int destStride = srcStride << bpp_shift;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(quantizer,
+                                                this,
+                                                (uint32*)src,
+                                                (uint32*)dest,
+                                                width,
+                                                blkheight));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(quantizer,
+                                              this,
+                                              (uint32*)src,
+                                              (uint32*)dest,
+                                              width,
+                                              height - blkheight * i));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+    } else {
+      (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
+    }
+
+  } else if (srcformat == GR_TEXFMT_ARGB_8888) {
+    switch (destformat) {
+    case GR_TEXFMT_ARGB_1555:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB1555 : &TxQuantize::ARGB8888_ARGB1555_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ARGB_4444:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_ARGB4444 : &TxQuantize::ARGB8888_ARGB4444_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_RGB_565:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_RGB565 : &TxQuantize::ARGB8888_RGB565_ErrD;
+      bpp_shift = 1;
+      break;
+    case GR_TEXFMT_ALPHA_8:
+    case GR_TEXFMT_INTENSITY_8:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_A8 : &TxQuantize::ARGB8888_I8_Slow;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_44:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI44 : &TxQuantize::ARGB8888_AI44_ErrD;
+      bpp_shift = 2;
+      break;
+    case GR_TEXFMT_ALPHA_INTENSITY_88:
+      quantizer = fastQuantizer ? &TxQuantize::ARGB8888_AI88 : &TxQuantize::ARGB8888_AI88_Slow;
+      bpp_shift = 1;
+      break;
+    default:
+      return 0;
+    }
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (height >> 2) / numcore;
+      numcore--;
+    }
+    if (blkrow > 0 && numcore > 1) {
+      boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (width * blkheight) << 2;
+      unsigned int destStride = srcStride >> bpp_shift;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(quantizer,
+                                                this,
+                                                (uint32*)src,
+                                                (uint32*)dest,
+                                                width,
+                                                blkheight));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(quantizer,
+                                              this,
+                                              (uint32*)src,
+                                              (uint32*)dest,
+                                              width,
+                                              height - blkheight * i));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+    } else {
+      (*this.*quantizer)((uint32*)src, (uint32*)dest, width, height);
+    }
+
+  } else {
+    return 0;
+  }
+
+  return 1;
+}
+
+boolean
+TxQuantize::FXT1(uint8 *src, uint8 *dest,
+             int srcwidth, int srcheight, uint16 srcformat,
+             int *destwidth, int *destheight, uint16 *destformat)
+{
+  /*
+   * NOTE: src must be in ARGB8888 format, srcformat describes
+   * the closest 16bbp representation of src.
+   *
+   * NOTE: I have modified the dxtn library to use ARGB format
+   * which originaly was ABGR format.
+   */
+
+  boolean bRet = 0;
+
+  if (_tx_compress_fxt1 &&
+      srcwidth >= 8 && srcheight >= 4) {
+    /* compress to fxt1
+     * width and height must be larger than 8 and 4 respectively
+     */
+    int dstRowStride = ((srcwidth + 7) & ~7) << 1;
+    int srcRowStride = (srcwidth << 2);
+
+    unsigned int numcore = _numcore;
+    unsigned int blkrow = 0;
+    while (numcore > 1 && blkrow == 0) {
+      blkrow = (srcheight >> 2) / numcore;
+      numcore--;
+    }
+    if (blkrow > 0 && numcore > 1) {
+      boost::thread *thrd[MAX_NUMCORE];
+      unsigned int i;
+      int blkheight = blkrow << 2;
+      unsigned int srcStride = (srcwidth * blkheight) << 2;
+      unsigned int destStride = dstRowStride * blkrow;
+      for (i = 0; i < numcore - 1; i++) {
+        thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1,
+                                                srcwidth,
+                                                blkheight,
+                                                4,
+                                                src,
+                                                srcRowStride,
+                                                dest,
+                                                dstRowStride));
+        src  += srcStride;
+        dest += destStride;
+      }
+      thrd[i] = new boost::thread(boost::bind(_tx_compress_fxt1,
+                                              srcwidth,
+                                              srcheight - blkheight * i,
+                                              4,
+                                              src,
+                                              srcRowStride,
+                                              dest,
+                                              dstRowStride));
+      for (i = 0; i < numcore; i++) {
+        thrd[i]->join();
+        delete thrd[i];
+      }
+    } else {
+      (*_tx_compress_fxt1)(srcwidth,      /* width */
+                           srcheight,     /* height */
+                           4,             /* comps: ARGB8888=4, RGB888=3 */
+                           src,           /* source */
+                           srcRowStride,  /* width*comps */
+                           dest,          /* destination */
+                           dstRowStride); /* 16 bytes per 8x4 texel */
+    }
+
+    /* dxtn adjusts width and height to M8 and M4 respectively by replication */
+    *destwidth  = (srcwidth  + 7) & ~7;
+    *destheight = (srcheight + 3) & ~3;
+    *destformat = GR_TEXFMT_ARGB_CMP_FXT1;
+
+    bRet = 1;
+  }
+  
+  return bRet;
+}
+
+boolean
+TxQuantize::DXTn(uint8 *src, uint8 *dest,
+             int srcwidth, int srcheight, uint16 srcformat,
+             int *destwidth, int *destheight, uint16 *destformat)
+{
+  /*
+   * NOTE: src must be in ARGB8888 format, srcformat describes
+   * the closest 16bbp representation of src.
+   *
+   * NOTE: I have modified the dxtn library to use ARGB format
+   * which originaly was ABGR format.
+   */
+
+  boolean bRet = 0;
+
+  if (_tx_compress_dxtn &&
+      srcwidth >= 4 && srcheight >= 4) {
+    /* compress to dxtn
+     * width and height must be larger than 4
+     */
+
+    /* skip formats that DXTn won't help in size. */
+    if (srcformat == GR_TEXFMT_ALPHA_8 ||
+        srcformat == GR_TEXFMT_ALPHA_INTENSITY_44) {
+      ; /* shutup compiler */
+    } else {
+      int dstRowStride = ((srcwidth + 3) & ~3) << 2;
+      int compression = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
+
+      *destformat = GR_TEXFMT_ARGB_CMP_DXT5;
+
+#if !GLIDE64_DXTN
+      /* okay... we are going to disable DXT1 with 1bit alpha
+       * for Glide64. some textures have all 0 alpha values.
+       * see "N64 Kobe Bryant in NBA Courtside"
+       */
+      if (srcformat == GR_TEXFMT_ARGB_1555) {
+        dstRowStride >>= 1;
+        compression = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
+        *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
+      } else
+#endif
+      if (srcformat == GR_TEXFMT_RGB_565 ||
+          srcformat == GR_TEXFMT_INTENSITY_8) {
+        dstRowStride >>= 1;
+        compression = GL_COMPRESSED_RGB_S3TC_DXT1_EXT;
+        *destformat = GR_TEXFMT_ARGB_CMP_DXT1;
+      }
+
+      unsigned int numcore = _numcore;
+      unsigned int blkrow = 0;
+      while (numcore > 1 && blkrow == 0) {
+        blkrow = (srcheight >> 2) / numcore;
+        numcore--;
+      }
+      if (blkrow > 0 && numcore > 1) {
+        boost::thread *thrd[MAX_NUMCORE];
+        unsigned int i;
+        int blkheight = blkrow << 2;
+        unsigned int srcStride = (srcwidth * blkheight) << 2;
+        unsigned int destStride = dstRowStride * blkrow;
+        for (i = 0; i < numcore - 1; i++) {
+          thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn,
+                                                  4,
+                                                  srcwidth,
+                                                  blkheight,
+                                                  src,
+                                                  compression,
+                                                  dest,
+                                                  dstRowStride));
+          src  += srcStride;
+          dest += destStride;
+        }
+        thrd[i] = new boost::thread(boost::bind(_tx_compress_dxtn,
+                                                4,
+                                                srcwidth,
+                                                srcheight - blkheight * i,
+                                                src,
+                                                compression,
+                                                dest,
+                                                dstRowStride));
+        for (i = 0; i < numcore; i++) {
+          thrd[i]->join();
+          delete thrd[i];
+        }
+      } else {
+        (*_tx_compress_dxtn)(4,             /* comps: ARGB8888=4, RGB888=3 */
+                             srcwidth,      /* width */
+                             srcheight,     /* height */
+                             src,           /* source */
+                             compression,   /* format */
+                             dest,          /* destination */
+                             dstRowStride); /* DXT1 = 8 bytes per 4x4 texel
+                                             * others = 16 bytes per 4x4 texel */
+      }
+
+      /* dxtn adjusts width and height to M4 by replication */
+      *destwidth  = (srcwidth  + 3) & ~3;
+      *destheight = (srcheight + 3) & ~3;
+
+      bRet = 1;
+    }
+  }
+
+  return bRet;
+}
+
+boolean
+TxQuantize::compress(uint8 *src, uint8 *dest,
+                    int srcwidth, int srcheight, uint16 srcformat,
+                    int *destwidth, int *destheight, uint16 *destformat,
+                    int compressionType)
+{
+  boolean bRet = 0;
+
+  switch (compressionType) {
+  case FXT1_COMPRESSION:
+    bRet = FXT1(src, dest,
+                srcwidth, srcheight, srcformat,
+                destwidth, destheight, destformat);
+    break;
+  case S3TC_COMPRESSION:
+    bRet = DXTn(src, dest,
+                srcwidth, srcheight, srcformat,
+                destwidth, destheight, destformat);
+    break;
+  case NCC_COMPRESSION:
+    /* TODO: narrow channel compression */
+    ;
+  }
+
+  return bRet;
+}
+
+#if 0 /* unused */
+void
+TxQuantize::I8_ARGB8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 2;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa
+    // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+    mov edx, eax;
+    and eax, 0x000000ff;
+    mov ebx, eax;        // 00000000 00000000 00000000 aaaaaaaa
+    shl ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    shl ebx, 8;         // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x0000ff00;
+    mov ebx, eax;        // 00000000 00000000 aaaaaaaa 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 00000000 aaaaaaaa aaaaaaaa
+    shl ebx, 16;         // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0x00ff0000;
+    mov ebx, eax;        // 00000000 aaaaaaaa 00000000 00000000
+    shr ebx, 8;          // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa 00000000
+    shr ebx, 8;         // 00000000 00000000 00000000 aaaaaaaa
+    or  eax, ebx;        // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    mov eax, edx;
+    and eax, 0xff000000;
+    mov ebx, eax;        // aaaaaaaa 00000000 00000000 00000000
+    shr ebx, 8;          // 00000000 aaaaaaaa 00000000 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa 00000000 00000000
+    shr ebx, 8;         // 00000000 00000000 aaaaaaaa 00000000
+    or  eax, ebx;        // aaaaaaaa aaaaaaaa aaaaaaaa 00000000
+    shr eax, 8;         // 00000000 aaaaaaaa aaaaaaaa aaaaaaaa
+    or  eax, 0xff000000; // 11111111 aaaaaaaa aaaaaaaa aaaaaaaa
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB8888_I8(uint32* src, uint32* dest, int width, int height)
+{
+  ARGB8888_A8(src, dest, width, height);
+}
+
+void
+TxQuantize::ARGB1555_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // arrr rrgg gggb bbbb
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;         // edx = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent1;
+    or  ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent1:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 14;          // edx = 00000000 00000bbb bb000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000bbb bb000000 00000000
+    shl edx, 5;           // edx = 00000000 bbbbb000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shr edx, 7;           // edx = 00000000 00000000 00000000 rrrrr000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000rrr
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    shr eax, 16;          // eax = 00000000 00000000 arrrrrgg gggbbbbb
+    mov edx, eax;         // edx = 00000000 00000000 arrrrrgg gggbbbbb
+    and ebx, 0x00000000;
+    and eax, 0x00008000;  // eax = 00000000 00000000 a0000000 00000000
+    jz  transparent2;
+    or  ebx, 0xff000000;  // ebx = aaaaaaaa 00000000 00000000 00000000
+
+  transparent2:
+    mov eax, edx;         // eax = arrrrrgg gggbbbbb arrrrrgg gggbbbbb
+    and edx, 0x0000001f;  // edx = 00000000 00000000 00000000 000bbbbb
+    shl edx, 14;          // edx = 00000000 00000bbb bb000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa 00000bbb bb000000 00000000
+    shl edx, 5;           // edx = 00000000 bbbbb000 00000000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb bb000000 00000000
+    and ebx, 0xffff0000;  // ebx = aaaaaaaa bbbbbbbb 00000000 00000000
+    mov edx, eax;
+    and edx, 0x000003e0;  // edx = 00000000 00000000 000000gg ggg00000
+    shl edx, 1;           // edx = 00000000 00000000 00000ggg gg000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb 00000ggg gg000000
+    shl edx, 5;           // edx = 00000000 00000000 ggggg000 00000000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg gg000000
+    and ebx, 0xffffff00;  // ebx = aaaaaaaa bbbbbbbb gggggggg 00000000
+    mov edx, eax;
+    and edx, 0x00007c00;  // edx = 00000000 00000000 0rrrrr00 00000000
+    shr edx, 7;           // edx = 00000000 00000000 00000000 rrrrr000
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrr000
+    shr edx, 5;           // edx = 00000000 00000000 00000000 00000rrr
+    or  ebx, edx;         // ebx = aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], ebx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB4444_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = (width * height) >> 1;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaa rrrr gggg bbbb
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;
+    and eax, 0x0000ffff;
+    mov ebx, eax;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
+    shl ebx, 16;         // 00000000 0000bbbb 00000000 00000000
+    or  eax, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
+    mov ebx, eax;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000rrrr
+    and eax, 0xfffffff0;
+    or  eax, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
+    mov ebx, eax;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and eax, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
+    or  eax, ebx;        // 0000aaaa 0000bbbb 0000gggg 0000rrrr
+    mov ebx, eax;
+    shl ebx, 4;          // aaaa0000 bbbb0000 gggg0000 rrrr0000
+    or  eax, ebx;        // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], eax;
+
+    add edi, 4;
+
+    shr edx, 16;
+    mov ebx, edx;        // 00000000 00000000 aaaarrrr ggggbbbb
+    and ebx, 0x0000f000; // 00000000 00000000 aaaa0000 00000000
+    shl ebx, 12;         // 0000aaaa 00000000 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 00000000 aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x0000000f; // 00000000 00000000 00000000 0000bbbb
+    shl ebx, 16;         // 00000000 0000bbbb 00000000 00000000
+    or  edx, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggbbbb
+    mov ebx, edx;
+    and ebx, 0x00000f00; // 00000000 00000000 0000rrrr 00000000
+    shr ebx, 8;          // 00000000 00000000 00000000 0000rrrr
+    and edx, 0xfffffff0;
+    or  edx, ebx;        // 0000aaaa 0000bbbb aaaarrrr ggggrrrr
+    mov ebx, edx;
+    and ebx, 0x000000f0; // 00000000 00000000 00000000 gggg0000
+    shl ebx, 4;          // 00000000 00000000 0000gggg 00000000
+    and edx, 0x0f0f000f; // 0000aaaa 0000bbbb 00000000 0000rrrr
+    or  edx, ebx;        // 0000aaaa 0000bbbb 0000gggg 0000rrrr
+    mov ebx, edx;
+    shl ebx, 4;          // aaaa0000 bbbb0000 gggg0000 rrrr0000
+    or  edx, ebx;        // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+
+    mov dword ptr [edi], edx;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+
+void
+TxQuantize::ARGB8888_ABGR8888(uint32* src, uint32* dest, int width, int height)
+{
+  int siz = width * height;
+
+  __asm {
+    push ebx;
+    push esi;
+    push edi;
+
+    mov esi, dword ptr [src];
+    mov edi, dword ptr [dest];
+    mov ecx, dword ptr [siz];
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    // aaaaaaaa bbbbbbbb gggggggg rrrrrrrr
+    mov edx, eax;
+    bswap edx;
+    shr edx, 8;
+    and eax, 0xff000000;
+
+    or eax, edx;
+
+    mov dword ptr [edi], eax;
+    add edi, 4;
+
+    dec ecx;
+    jnz tc1_loop;
+
+    pop edi;
+    pop esi;
+    pop ebx;
+  }
+}
+#endif
diff --git a/GLideNHQ/TxQuantize.h b/GLideNHQ/TxQuantize.h
new file mode 100644
index 00000000..d3c6ae6d
--- /dev/null
+++ b/GLideNHQ/TxQuantize.h
@@ -0,0 +1,99 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXQUANTIZE_H__
+#define __TXQUANTIZE_H__
+
+/* Glide64 DXTn workaround
+ * (0:disable, 1:enable) */
+#define GLIDE64_DXTN 1
+
+#include "TxInternal.h"
+#include "TxUtil.h"
+
+class TxQuantize
+{
+private:
+  TxUtil *_txUtil;
+  int _numcore;
+
+  fxtCompressTexFuncExt _tx_compress_fxt1;
+  dxtCompressTexFuncExt _tx_compress_dxtn;
+
+  /* fast optimized... well, sort of. */
+  void ARGB1555_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB4444_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void RGB565_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void A8_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void AI44_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  void AI88_ARGB8888(uint32* src, uint32* dst, int width, int height);
+
+  void ARGB8888_ARGB1555(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB4444(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_RGB565(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_A8(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI44(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI88(uint32* src, uint32* dst, int width, int height);
+
+  /* quality */
+  void ARGB8888_RGB565_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB1555_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ARGB4444_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI44_ErrD(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_AI88_Slow(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_I8_Slow(uint32* src, uint32* dst, int width, int height);
+
+  /* compressors */
+  boolean FXT1(uint8 *src, uint8 *dest,
+               int srcwidth, int srcheight, uint16 srcformat,
+               int *destwidth, int *destheight, uint16 *destformat);
+  boolean DXTn(uint8 *src, uint8 *dest,
+               int srcwidth, int srcheight, uint16 srcformat,
+               int *destwidth, int *destheight, uint16 *destformat);
+
+public:
+  TxQuantize();
+  ~TxQuantize();
+
+  /* others */
+  void P8_16BPP(uint32* src, uint32* dst, int width, int height, uint32* palette);
+
+  boolean quantize(uint8* src, uint8* dest, int width, int height, uint16 srcformat, uint16 destformat, boolean fastQuantizer = 1);
+
+  boolean compress(uint8 *src, uint8 *dest,
+                   int srcwidth, int srcheight, uint16 srcformat,
+                   int *destwidth, int *destheight, uint16 *destformat,
+                   int compressionType);
+
+
+#if 0 /* unused */
+  void ARGB8888_I8(uint32* src, uint32* dst, int width, int height);
+  void I8_ARGB8888(uint32* src, uint32* dst, int width, int height);
+  
+  void ARGB1555_ABGR8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB4444_ABGR8888(uint32* src, uint32* dst, int width, int height);
+  void ARGB8888_ABGR8888(uint32* src, uint32* dst, int width, int height);
+#endif
+};
+
+#endif /* __TXQUANTIZE_H__ */
diff --git a/GLideNHQ/TxReSample.cpp b/GLideNHQ/TxReSample.cpp
new file mode 100644
index 00000000..138428b7
--- /dev/null
+++ b/GLideNHQ/TxReSample.cpp
@@ -0,0 +1,417 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "TxReSample.h"
+#include "TxDbg.h"
+#include <stdlib.h>
+#include <memory.h>
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+int
+TxReSample::nextPow2(int num)
+{
+  num = num - 1;
+  num = num | (num >> 1);
+  num = num | (num >> 2);
+  num = num | (num >> 4);
+  num = num | (num >> 8);
+  num = num | (num >> 16);
+  /*num = num | (num >> 32);*//* for 64bit architecture */
+  num = num + 1;
+
+  return num;
+}
+
+boolean
+TxReSample::nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx = 0)
+{
+  /* NOTE: bpp must be one of the follwing: 8, 16, 24, 32 bits per pixel */
+
+  if (!*image || !*width || !*height || !bpp)
+    return 0;
+
+  int row_bytes = ((*width * bpp) >> 3);
+  int o_row_bytes = row_bytes;
+  int o_width = *width;
+  int n_width = *width;
+  int o_height = *height;
+  int n_height = *height;
+
+  /* HACKALERT: I have explicitly subtracted (n) from width/height to
+   * adjust textures that have (n) pixel larger width/height than
+   * power of 2 size. This is a dirty hack for textures that have
+   * munged aspect ratio by (n) pixel to the original.
+   */
+  if      (n_width  > 64) n_width  -= 4;
+  else if (n_width  > 16) n_width  -= 2;
+  else if (n_width  >  4) n_width  -= 1;
+
+  if      (n_height > 64) n_height -= 4;
+  else if (n_height > 16) n_height -= 2;
+  else if (n_height >  4) n_height -= 1;
+
+  n_width = nextPow2(n_width);
+  n_height = nextPow2(n_height);
+  row_bytes = (n_width * bpp) >> 3;
+
+  /* 3dfx Glide3 format, W:H aspect ratio range (8:1 - 1:8) */
+  if (use_3dfx) {
+    if (n_width > n_height) {
+      if (n_width > (n_height << 3))
+        n_height = n_width >> 3;
+    } else {
+      if (n_height > (n_width << 3)) {
+        n_width = n_height >> 3;
+        row_bytes = (n_width * bpp) >> 3;
+      }
+    }
+    DBG_INFO(80, L"using 3dfx W:H aspect ratio range (8:1 - 1:8).\n");
+  }
+
+  /* do we really need to do this ? */
+  if (o_width == n_width && o_height == n_height)
+    return 1; /* nope */
+
+  DBG_INFO(80, L"expand image to next power of 2 dimensions. %d x %d -> %d x %d\n",
+           o_width, o_height, n_width, n_height);
+
+  if (o_width > n_width)
+    o_width = n_width;
+
+  if (o_height > n_height)
+    o_height = n_height;
+
+  /* allocate memory to read in image */
+  uint8 *pow2image = (uint8*)malloc(row_bytes * n_height);
+
+  /* read in image */
+  if (pow2image) {
+    int i, j;
+    uint8 *tmpimage = *image, *tmppow2image = pow2image;
+
+    for (i = 0; i < o_height; i++) {
+      /* copy row */
+      memcpy(tmppow2image, tmpimage, ((o_width * bpp) >> 3));
+
+      /* expand to pow2 size by replication */
+      for(j = ((o_width * bpp) >> 3); j < row_bytes; j++)
+        tmppow2image[j] = tmppow2image[j - (bpp >> 3)];
+
+      tmppow2image += row_bytes;
+      tmpimage += o_row_bytes;
+    }
+    /* expand to pow2 size by replication */
+    for (i = o_height; i < n_height; i++)
+      memcpy(&pow2image[row_bytes * i], &pow2image[row_bytes * (i - 1)], row_bytes);
+
+    free(*image);
+
+    *image = pow2image;
+    *height = n_height;
+    *width = n_width;
+
+    return 1;
+  }
+
+  return 0;
+}
+
+/* Ken Turkowski
+ * Filters for Common Resampling Tasks
+ * Apple Computer 1990
+ */
+double
+TxReSample::tent(double x)
+{
+  if (x < 0.0) x = -x;
+  if (x < 1.0) return (1.0 - x);
+  return 0.0;
+}
+
+double
+TxReSample::gaussian(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 2.0) return pow(2.0, -2.0 * x * x);
+  return 0.0;
+}
+
+double 
+TxReSample::sinc(double x)
+{
+  if (x == 0) return 1.0;
+  x *= M_PI;
+  return (sin(x) / x);
+}
+
+double 
+TxReSample::lanczos3(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 3.0) return (sinc(x) * sinc(x/3.0));
+  return 0.0;
+}
+
+/* Don P. Mitchell and Arun N. Netravali
+ * Reconstruction Filters in Computer Graphics
+ * SIGGRAPH '88
+ * Proceedings of the 15th annual conference on Computer 
+ * graphics and interactive techniques, pp221-228, 1988
+ */
+double
+TxReSample::mitchell(double x)
+{
+  if (x < 0) x = -x;
+  if (x < 2.0) {
+    const double B = 1.0 / 3.0;
+    const double C = 1.0 / 3.0;
+    if (x < 1.0) {
+      x = (((12.0 - 9.0 * B - 6.0 * C) * (x * x * x))
+           + ((-18.0 + 12.0 * B + 6.0 * C) * (x * x))
+           + (6.0 - 2.0 * B));
+    } else {
+      x = (((-1.0 * B - 6.0 * C) * (x * x * x))
+           + ((6.0 * B + 30.0 * C) * (x * x))
+           + ((-12.0 * B - 48.0 * C) * x)
+           + (8.0 * B + 24.0 * C));
+    }
+    return (x / 6.0);
+  }
+  return 0.0;
+}
+
+/* J. F. Kaiser and W. A. Reed
+ * Data smoothing using low-pass digital filters
+ * Rev. Sci. instrum. 48 (11), pp1447-1457, 1977
+ */
+double
+TxReSample::besselI0(double x)
+{
+  /* zero-order modified bessel function of the first kind */
+  const double eps_coeff = 1E-16; /* small enough */
+  double xh, sum, pow, ds;
+  xh = 0.5 * x;
+  sum = 1.0;
+  pow = 1.0;
+  ds = 1.0;
+  int k = 0;
+  while (ds > sum * eps_coeff) {
+    k++;
+    pow *= (xh / k);
+    ds = pow * pow;
+    sum = sum + ds;
+  }
+  return sum;
+}
+
+double
+TxReSample::kaiser(double x)
+{
+  const double alpha = 4.0;
+  const double half_window = 5.0;
+  const double ratio = x / half_window;
+  return sinc(x) * besselI0(alpha * sqrt(1 - ratio * ratio)) / besselI0(alpha);
+}
+
+boolean
+TxReSample::minify(uint8 **src, int *width, int *height, int ratio)
+{
+  /* NOTE: src must be ARGB8888, ratio is the inverse representation */
+
+#if 0
+  if (!*src || ratio < 2) return 0;
+
+  /* Box filtering.
+   * It would be nice to do Kaiser filtering.
+   * N64 uses narrow strip textures which makes it hard to filter effectively.
+   */
+
+  int x, y, x2, y2, offset, numtexel;
+  uint32 A, R, G, B, texel;
+
+  int tmpwidth = *width / ratio;
+  int tmpheight = *height / ratio;
+
+  uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2);
+
+  if (tmptex) {
+    numtexel = ratio * ratio;
+    for (y = 0; y < tmpheight; y++) {
+      offset = ratio * y * *width;
+      for (x = 0; x < tmpwidth; x++) {
+        A = R = G = B = 0;
+        for (y2 = 0; y2 < ratio; y2++) {
+          for (x2 = 0; x2 < ratio; x2++) {
+            texel = ((uint32*)*src)[offset + *width * y2 + x2];
+            A += (texel >> 24);
+            R += ((texel >> 16) & 0x000000ff);
+            G += ((texel >> 8) & 0x000000ff);
+            B += (texel & 0x000000ff);
+          }
+        }
+        A = (A + ratio) / numtexel;
+        R = (R + ratio) / numtexel;
+        G = (G + ratio) / numtexel;
+        B = (B + ratio) / numtexel;
+        ((uint32*)tmptex)[y * tmpwidth + x] = ((A << 24) | (R << 16) | (G << 8) | B);
+        offset += ratio;
+      }
+    }
+    free(*src);
+    *src = tmptex;
+    *width = tmpwidth;
+    *height = tmpheight;
+
+    DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height);
+
+    return 1;
+  }
+
+  DBG_INFO(80, L"Error: failed minification!\n");
+
+  return 0;
+
+#else
+
+  if (!*src || ratio < 2) return 0;
+
+  /* Image Resampling */
+  
+  /* half width of filter window.
+   * NOTE: must be 1.0 or larger. 
+   *
+   * kaiser-bessel 5, lanczos3 3, mitchell 2, gaussian 1.5, tent 1
+   */
+  double half_window = 5.0;
+
+  int x, y, x2, y2, z;
+  double A, R, G, B;
+  uint32 texel;
+
+  int tmpwidth = *width / ratio;
+  int tmpheight = *height / ratio;
+
+  /* resampled destination */
+  uint8 *tmptex = (uint8*)malloc((tmpwidth * tmpheight) << 2);
+  if (!tmptex) return 0;
+
+  /* work buffer. single row */
+  uint8 *workbuf = (uint8*)malloc(*width << 2);
+  if (!workbuf) {
+    free(tmptex);
+    return 0;
+  }
+
+  /* prepare filter lookup table. only half width required for symetric filters. */
+  double *weight = (double*)malloc((int)((half_window * ratio) * sizeof(double)));
+  if (!weight) {
+    free(tmptex);
+    free(workbuf);
+    return 0;
+  }
+  for (x = 0; x < half_window * ratio; x++) {
+    //weight[x] = tent((double)x / ratio) / ratio;
+    //weight[x] = gaussian((double)x / ratio) / ratio;
+    //weight[x] = lanczos3((double)x / ratio) / ratio;
+    //weight[x] = mitchell((double)x / ratio) / ratio;
+    weight[x] = kaiser((double)x / ratio) / ratio;
+  }
+
+  /* linear convolution */
+  for (y = 0; y < tmpheight; y++) {
+    for (x = 0; x < *width; x++) {
+      texel = ((uint32*)*src)[y * ratio * *width + x];
+      A = (double)(texel >> 24) * weight[0];
+      R = (double)((texel >> 16) & 0xff) * weight[0];
+      G = (double)((texel >>  8) & 0xff) * weight[0];
+      B = (double)((texel      ) & 0xff) * weight[0];
+      for (y2 = 1; y2 < half_window * ratio; y2++) {
+        z = y * ratio + y2;
+        if (z >= *height) z = *height - 1;
+        texel = ((uint32*)*src)[z * *width + x];
+        A += (double)(texel >> 24) * weight[y2];
+        R += (double)((texel >> 16) & 0xff) * weight[y2];
+        G += (double)((texel >>  8) & 0xff) * weight[y2];
+        B += (double)((texel      ) & 0xff) * weight[y2];
+        z = y * ratio - y2;
+        if (z < 0) z = 0;
+        texel = ((uint32*)*src)[z * *width + x];
+        A += (double)(texel >> 24) * weight[y2];
+        R += (double)((texel >> 16) & 0xff) * weight[y2];
+        G += (double)((texel >>  8) & 0xff) * weight[y2];
+        B += (double)((texel      ) & 0xff) * weight[y2];
+      }
+      if (A < 0) A = 0; else if (A > 255) A = 255;
+      if (R < 0) R = 0; else if (R > 255) R = 255;
+      if (G < 0) G = 0; else if (G > 255) G = 255;
+      if (B < 0) B = 0; else if (B > 255) B = 255;
+      ((uint32*)workbuf)[x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B);
+    }
+    for (x = 0; x < tmpwidth; x++) {
+      texel = ((uint32*)workbuf)[x * ratio];
+      A = (double)(texel >> 24) * weight[0];
+      R = (double)((texel >> 16) & 0xff) * weight[0];
+      G = (double)((texel >>  8) & 0xff) * weight[0];
+      B = (double)((texel      ) & 0xff) * weight[0];
+      for (x2 = 1; x2 < half_window * ratio; x2++) {
+        z = x * ratio + x2;
+        if (z >= *width) z = *width - 1;
+        texel = ((uint32*)workbuf)[z];
+        A += (double)(texel >> 24) * weight[x2];
+        R += (double)((texel >> 16) & 0xff) * weight[x2];
+        G += (double)((texel >>  8) & 0xff) * weight[x2];
+        B += (double)((texel      ) & 0xff) * weight[x2];
+        z = x * ratio - x2;
+        if (z < 0) z = 0;
+        texel = ((uint32*)workbuf)[z];
+        A += (double)(texel >> 24) * weight[x2];
+        R += (double)((texel >> 16) & 0xff) * weight[x2];
+        G += (double)((texel >>  8) & 0xff) * weight[x2];
+        B += (double)((texel      ) & 0xff) * weight[x2];
+      }
+      if (A < 0) A = 0; else if (A > 255) A = 255;
+      if (R < 0) R = 0; else if (R > 255) R = 255;
+      if (G < 0) G = 0; else if (G > 255) G = 255;
+      if (B < 0) B = 0; else if (B > 255) B = 255;
+      ((uint32*)tmptex)[y * tmpwidth + x] = (((uint32)A << 24) | ((uint32)R << 16) | ((uint32)G << 8) | (uint32)B);
+    }
+  }
+
+  free(*src);
+  *src = tmptex;
+  free(weight);
+  free(workbuf);
+  *width = tmpwidth;
+  *height = tmpheight;
+
+  DBG_INFO(80, L"minification ratio:%d -> %d x %d\n", ratio, *width, *height);
+
+  return 1;
+#endif
+}
diff --git a/GLideNHQ/TxReSample.h b/GLideNHQ/TxReSample.h
new file mode 100644
index 00000000..805647d2
--- /dev/null
+++ b/GLideNHQ/TxReSample.h
@@ -0,0 +1,45 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXRESAMPLE_H__
+#define __TXRESAMPLE_H__
+
+#include "TxInternal.h"
+
+class TxReSample
+{
+private:
+  double tent(double x);
+  double gaussian(double x);
+  double sinc(double x);
+  double lanczos3(double x);
+  double mitchell(double x);
+  double besselI0(double x);
+  double kaiser(double x);
+public:
+  boolean minify(uint8 **src, int *width, int *height, int ratio);
+  boolean nextPow2(uint8** image, int* width, int* height, int bpp, boolean use_3dfx);
+  int nextPow2(int num);
+};
+
+#endif /* __TXRESAMPLE_H__ */
diff --git a/GLideNHQ/TxTexCache.cpp b/GLideNHQ/TxTexCache.cpp
new file mode 100644
index 00000000..c063c240
--- /dev/null
+++ b/GLideNHQ/TxTexCache.cpp
@@ -0,0 +1,79 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __MSC__
+#pragma warning(disable: 4786)
+#endif
+
+/* dump cache to disk (0:disable, 1:enable) */
+#define DUMP_CACHE 1
+
+#include "TxTexCache.h"
+#include "TxDbg.h"
+#include <zlib.h>
+#include <string>
+#include <boost/filesystem.hpp>
+
+TxTexCache::~TxTexCache()
+{
+#if DUMP_CACHE
+  if (_options & DUMP_TEXCACHE) {
+	/* dump cache to disk */
+	std::wstring filename = _ident + L"_MEMORYCACHE.dat";
+	boost::filesystem::wpath cachepath(_path);
+	cachepath /= boost::filesystem::wpath(L"cache");
+	int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE);
+
+	TxCache::save(cachepath.wstring().c_str(), filename.c_str(), config);
+  }
+#endif
+}
+
+TxTexCache::TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+					   dispInfoFuncExt callback
+					   ) : TxCache((options & ~GZ_HIRESTEXCACHE), cachesize, path, ident, callback)
+{
+  /* assert local options */
+  if (_path.empty() || _ident.empty() || !_cacheSize)
+	_options &= ~DUMP_TEXCACHE;
+
+#if DUMP_CACHE
+  if (_options & DUMP_TEXCACHE) {
+	/* find it on disk */
+	std::wstring filename = _ident + L"_MEMORYCACHE.dat";
+	boost::filesystem::wpath cachepath(_path);
+	cachepath /= boost::filesystem::wpath(L"cache");
+	int config = _options & (FILTER_MASK|ENHANCEMENT_MASK|COMPRESS_TEX|COMPRESSION_MASK|FORCE16BPP_TEX|GZ_TEXCACHE);
+
+	TxCache::load(cachepath.wstring().c_str(), filename.c_str(), config);
+  }
+#endif
+}
+
+boolean
+TxTexCache::add(uint64 checksum, GHQTexInfo *info)
+{
+  if (_cacheSize <= 0) return 0;
+
+  return TxCache::add(checksum, info);
+}
diff --git a/GLideNHQ/TxTexCache.h b/GLideNHQ/TxTexCache.h
new file mode 100644
index 00000000..b8020953
--- /dev/null
+++ b/GLideNHQ/TxTexCache.h
@@ -0,0 +1,39 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXTEXCACHE_H__
+#define __TXTEXCACHE_H__
+
+#include "TxCache.h"
+
+class TxTexCache : public TxCache
+{
+public:
+  ~TxTexCache();
+  TxTexCache(int options, int cachesize, const wchar_t *path, const wchar_t *ident,
+             dispInfoFuncExt callback);
+  boolean add(uint64 checksum, /* checksum hi:palette low:texture */
+              GHQTexInfo *info);
+};
+
+#endif /* __TXTEXCACHE_H__ */
diff --git a/GLideNHQ/TxUtil.cpp b/GLideNHQ/TxUtil.cpp
new file mode 100644
index 00000000..4972b8e6
--- /dev/null
+++ b/GLideNHQ/TxUtil.cpp
@@ -0,0 +1,1006 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "TxUtil.h"
+#include "TxDbg.h"
+#include <zlib.h>
+#include <malloc.h>
+
+/*
+ * External libraries
+ ******************************************************************************/
+TxLoadLib::TxLoadLib()
+{
+#ifdef DXTN_DLL
+  if (!_dxtnlib)
+    _dxtnlib = LoadLibrary("dxtn");
+
+  if (_dxtnlib) {
+    if (!_tx_compress_dxtn)
+      _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
+
+    if (!_tx_compress_fxt1)
+      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
+  }
+#else
+  _tx_compress_dxtn = tx_compress_dxtn;
+  _tx_compress_fxt1 = fxt1_encode;
+
+#endif
+}
+
+TxLoadLib::~TxLoadLib()
+{
+#ifdef DXTN_DLL
+  /* free dynamic library */
+  if (_dxtnlib)
+    FreeLibrary(_dxtnlib);
+#endif
+
+}
+
+fxtCompressTexFuncExt
+TxLoadLib::getfxtCompressTexFuncExt()
+{
+  return _tx_compress_fxt1;
+}
+
+dxtCompressTexFuncExt
+TxLoadLib::getdxtCompressTexFuncExt()
+{
+  return _tx_compress_dxtn;
+}
+
+
+/*
+ * Utilities
+ ******************************************************************************/
+uint32
+TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
+{
+  int dataSize = sizeofTx(width, height, format);
+
+  /* for now we use adler32 if something else is better
+   * we can simply swtich later
+   */
+  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
+
+  /* zlib crc32 */
+  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
+}
+
+int
+TxUtil::sizeofTx(int width, int height, uint16 format)
+{
+  int dataSize = 0;
+
+  /* a lookup table for the shifts would be better */
+  switch (format) {
+  case GR_TEXFMT_ARGB_CMP_FXT1:
+    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT1:
+    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
+    break;
+  case GR_TEXFMT_ARGB_CMP_DXT3:
+  case GR_TEXFMT_ARGB_CMP_DXT5:
+    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
+    break;
+  case GR_TEXFMT_ALPHA_INTENSITY_44:
+  case GR_TEXFMT_ALPHA_8:
+  case GR_TEXFMT_INTENSITY_8:
+  case GR_TEXFMT_P_8:
+    dataSize = width * height;
+    break;
+  case GR_TEXFMT_ARGB_4444:
+  case GR_TEXFMT_ARGB_1555:
+  case GR_TEXFMT_RGB_565:
+  case GR_TEXFMT_ALPHA_INTENSITY_88:
+    dataSize = (width * height) << 1;
+    break;
+  case GR_TEXFMT_ARGB_8888:
+    dataSize = (width * height) << 2;
+    break;
+  default:
+    /* unsupported format */
+    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
+    ;
+  }
+
+  return dataSize;
+}
+
+#if 0 /* unused */
+uint32
+TxUtil::chkAlpha(uint32* src, int width, int height)
+{
+  /* NOTE: _src must be ARGB8888
+   * return values
+   * 0x00000000: 8bit alpha
+   * 0x00000001: 1bit alpha
+   * 0xff000001: no alpha
+   */
+
+  int _size = width * height;
+  uint32 alpha = 0;
+
+  __asm {
+    mov esi, dword ptr [src];
+    mov ecx, dword ptr [_size];
+    mov ebx, 0xff000000;
+
+  tc1_loop:
+    mov eax, dword ptr [esi];
+    add esi, 4;
+
+    and eax, 0xff000000;
+    jz  alpha1bit;
+    cmp eax, 0xff000000;
+    je  alpha1bit;
+    jmp done;
+
+  alpha1bit:
+    and ebx, eax;
+    dec ecx;
+    jnz tc1_loop;
+
+    or  ebx, 0x00000001;
+    mov dword ptr [alpha], ebx;
+
+  done:
+  }
+
+  return alpha;
+}
+#endif
+
+uint32
+TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+
+  if (!src) return 0;
+
+  return RiceCRC32(src, width, height, size, rowStride);
+}
+
+uint64
+TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
+{
+  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
+   * any other custom checksum.
+   * TODO: use *_HIRESTEXTURE option. */
+  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
+
+  if (!src) return 0;
+
+  uint64 crc64Ret = 0;
+
+  if (palette) {
+    uint32 crc32 = 0, cimax = 0;
+    switch (size & 0xff) {
+    case 1:
+      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+      break;
+    case 0:
+      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
+        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
+        crc64Ret <<= 32;
+        crc64Ret |= (uint64)crc32;
+      }
+    }
+  }
+  if (!crc64Ret) {
+    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
+  }
+
+  return crc64Ret;
+}
+
+/*
+** Computes Adler32 checksum for a stream of data.
+**
+** From the specification found in RFC 1950: (ZLIB Compressed Data Format
+** Specification version 3.3)
+**
+** ADLER32 (Adler-32 checksum) This contains a checksum value of the
+** uncompressed data (excluding any dictionary data) computed according to
+** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
+** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
+**
+** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
+** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
+** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
+** as s2*65536 + s1 in most-significant-byte first (network) order.
+**
+** 8.2. The Adler-32 algorithm 
+**
+** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
+** provides an extremely low probability of undetected errors.
+**
+** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
+** so the modulo operation time is negligible. If the bytes are a, b, c,
+** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
+** unlike the first sum, which is just a checksum. That 65521 is prime is
+** important to avoid a possible large class of two-byte errors that leave
+** the check unchanged. (The Fletcher checksum uses 255, which is not prime
+** and which also makes the Fletcher check insensitive to single byte
+** changes 0 <-> 255.)
+**
+** The sum s1 is initialized to 1 instead of zero to make the length of
+** the sequence part of s2, so that the length does not have to be checked
+** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
+*/
+
+uint32
+TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
+{
+#if 1
+  /* zlib adler32 */
+  return adler32(dwAdler32, data, Len);
+#else
+  register uint32 s1 = dwAdler32 & 0xFFFF;
+  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
+  int k;
+
+  while (Len > 0) {
+    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+    k = (Len < 5552 ? Len : 5552);
+    Len -= k;
+    while (k--) {
+      s1 += *data++;
+      s2 += s1;
+    }
+    /* 65521 is the largest prime smaller than 65536 */
+    s1 %= 65521;
+    s2 %= 65521;
+  }
+
+  return (s2 << 16) | s1;
+#endif
+}
+
+uint32
+TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  int i;
+  uint32 ret = 1;
+  uint32 width_in_bytes = width * size;
+
+  for (i = 0; i < height; i++) {
+    ret = Adler32(src, width_in_bytes, ret);
+    src += rowStride;
+  }
+
+  return ret;
+}
+
+/* Rice CRC32 for hires texture packs */
+/* NOTE: The following is used in Glide64 to calculate the CRC32
+ * for Rice hires texture packs.
+ *
+ * BYTE* addr = (BYTE*)(gfx.RDRAM +
+ *                     rdp.addr[rdp.tiles[tile].t_mem] +
+ *                     (rdp.tiles[tile].ul_t * bpl) +
+ *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
+ * RiceCRC32(addr,
+ *          rdp.tiles[tile].width,
+ *          rdp.tiles[tile].height,
+ *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
+ *          bpl);
+ */
+uint32
+TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  try {
+#ifdef __MSC__
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  return crc32Ret;
+}
+
+boolean
+TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 4bit CI */
+  try {
+#ifdef __MSC__
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x0000000f;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax8;
+      mov edi, ecx;
+
+    findmax8:
+      mov ecx, esi;
+      shr ecx, 4;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax7;
+      mov edi, ecx;
+
+    findmax7:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax6;
+      mov edi, ecx;
+
+    findmax6:
+      mov ecx, esi;
+      shr ecx, 12;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax5;
+      mov edi, ecx;
+
+    findmax5:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 20;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 28;
+      and ecx, 0x0000000f;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x0000000f, %%edi \n"
+      "je  10f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $4, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $12, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   6f \n"
+      "movl %%ecx, %%edi \n"
+
+      "6: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $20, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   7f \n"
+      "movl %%ecx, %%edi \n"
+
+      "7: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   8f \n"
+      "movl %%ecx, %%edi \n"
+
+      "8: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $28, %%ecx \n"
+      "andl $0x0000000f, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   9f \n"
+      "movl %%ecx, %%edi \n"
+
+      "9: \n"
+      "popl %%ecx \n"
+
+      "10: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+boolean
+TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
+                      uint32* crc32, uint32* cimax)
+{
+  /* NOTE: bytes_per_width must be equal or larger than 4 */
+
+  uint32 crc32Ret = 0;
+  uint32 cimaxRet = 0;
+  const uint32 bytes_per_width = ((width << size) + 1) >> 1;
+
+  /*if (bytes_per_width < 4) return 0;*/
+
+  /* 8bit CI */
+  try {
+#ifdef __MSC__
+    __asm {
+      push ebx;
+      push esi;
+      push edi;
+
+      mov ecx, dword ptr [src];
+      mov eax, dword ptr [height];
+      mov edx, 0;
+      mov edi, 0;
+      dec eax;
+
+    loop2:
+      mov ebx, dword ptr [bytes_per_width];
+      sub ebx, 4;
+
+    loop1:
+      mov esi, dword ptr [ecx+ebx];
+
+      cmp edi, 0x000000ff;
+      je findmax0;
+
+      push ecx;
+      mov ecx, esi;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax4;
+      mov edi, ecx;
+
+    findmax4:
+      mov ecx, esi;
+      shr ecx, 8;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax3;
+      mov edi, ecx;
+
+    findmax3:
+      mov ecx, esi;
+      shr ecx, 16;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax2;
+      mov edi, ecx;
+
+    findmax2:
+      mov ecx, esi;
+      shr ecx, 24;
+      and ecx, 0x000000ff;
+      cmp ecx, edi;
+      jb  findmax1;
+      mov edi, ecx;
+
+    findmax1:
+      pop ecx;
+
+    findmax0:
+      xor esi, ebx;
+      rol edx, 4;
+      add edx, esi;
+      sub ebx, 4;
+      jge loop1;
+
+      xor esi, eax;
+      add edx, esi;
+      add ecx, dword ptr [rowStride];
+      dec eax;
+      jge loop2;
+
+      mov dword ptr [crc32Ret], edx;
+      mov dword ptr [cimaxRet], edi;
+
+      pop edi;
+      pop esi;
+      pop ebx;
+    }
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+      "pushl %%esi \n"
+      "pushl %%edi \n"
+
+      "movl %0, %%ecx \n"
+      "movl %1, %%eax \n"
+      "movl $0, %%edx \n"
+      "movl $0, %%edi \n"
+      "decl %%eax \n"
+
+      "0: \n"
+      "movl %2, %%ebx \n"
+      "subl $4, %%ebx \n"
+
+      "1: \n"
+      "movl (%%ecx,%%ebx), %%esi \n"
+
+      "cmpl $0x000000ff, %%edi \n"
+      "je   6f \n"
+
+      "pushl %%ecx \n"
+      "movl %%esi, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   2f \n"
+      "movl %%ecx, %%edi \n"
+
+      "2: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $8, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   3f \n"
+      "movl %%ecx, %%edi \n"
+
+      "3: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $16, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   4f \n"
+      "movl %%ecx, %%edi \n"
+
+      "4: \n"
+      "movl %%esi, %%ecx \n"
+      "shrl $24, %%ecx \n"
+      "andl $0x000000ff, %%ecx \n"
+      "cmpl %%edi, %%ecx \n"
+      "jb   5f \n"
+      "movl %%ecx, %%edi \n"
+
+      "5: \n"
+      "popl %%ecx \n"
+
+      "6: \n"
+      "xorl %%ebx, %%esi \n"
+      "roll $4, %%edx \n"
+      "addl %%esi, %%edx \n"
+      "subl $4, %%ebx \n"
+      "jge  1b \n"
+
+      "xorl %%eax, %%esi \n"
+      "addl %%esi, %%edx \n"
+      "addl %3, %%ecx \n"
+      "decl %%eax \n"
+      "jge  0b \n"
+
+      "movl %%edx, %4 \n"
+      "movl %%edi, %5 \n"
+
+      "popl %%edi \n"
+      "popl %%esi \n"
+      "popl %%ebx \n"
+      :
+      : "m"(src), "m"(height), "m"(bytes_per_width), "m"(rowStride), "m"(crc32Ret), "m"(cimaxRet)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: RiceCRC32 exception!\n");
+  }
+
+  *crc32 = crc32Ret;
+  *cimax = cimaxRet;
+
+  return 1;
+}
+
+int
+TxUtil::log2(int num)
+{
+  int i = 0;
+
+#if 1
+  if (!num) return 0;
+#ifdef __MSC__
+  __asm {
+    mov eax, dword ptr [num];
+    bsr eax, eax;
+    mov dword ptr [i], eax;
+  }
+#else
+  asm volatile(
+    "movl %0, %%eax \n"
+    "bsrl %%eax, %%eax \n"
+    "movl %%eax, %1 \n"
+    :
+    : "m"(num), "m"(i)
+    : "memory", "cc"
+    );
+#endif
+#else
+  switch (num) {
+    case 1:    return 0;
+    case 2:    return 1;
+    case 4:    return 2;
+    case 8:    return 3;
+    case 16:   return 4;
+    case 32:   return 5;
+    case 64:   return 6;
+    case 128:  return 7;
+    case 256:  return 8;
+    case 512:  return 9;
+    case 1024:  return 10;
+    case 2048:  return 11;
+  }
+#endif
+
+  return i;
+}
+
+int
+TxUtil::grLodLog2(int w, int h)
+{
+  return (w >= h ? log2(w) : log2(h));
+}
+
+int
+TxUtil::grAspectRatioLog2(int w, int h)
+{
+  return (w >= h ? log2(w/h) : -log2(h/w));
+}
+
+int
+TxUtil::getNumberofProcessors()
+{
+  int numcore = 1;
+
+  /* number of logical processors per physical processor */
+  try {
+#ifdef __MSC__
+#if 1
+    /* use win32 api */
+    SYSTEM_INFO siSysInfo;
+    ZeroMemory(&siSysInfo, sizeof(SYSTEM_INFO));
+    GetSystemInfo(&siSysInfo);
+    numcore = siSysInfo.dwNumberOfProcessors;
+#else
+    __asm {
+      push ebx;
+
+      mov eax, 1;
+      cpuid;
+      test edx, 0x10000000; /* check HTT */
+      jz uniproc;
+      and ebx, 0x00ff0000;  /* mask logical core counter bit */
+      shr ebx, 16;
+      mov dword ptr [numcore], ebx;
+    uniproc:
+
+      pop ebx;
+    }
+#endif
+#else
+    asm volatile(
+      "pushl %%ebx \n"
+
+      "movl $1, %%eax \n"
+      "cpuid \n"
+      "testl $0x10000000, %%edx \n"
+      "jz 0f \n"
+      "andl $0x00ff0000, %%ebx \n"
+      "shrl $16, %%ebx \n"
+      "movl %%ebx, %0 \n"
+      "0: \n"
+
+      "popl %%ebx \n"
+      :
+      : "m"(numcore)
+      : "memory", "cc"
+      );
+#endif
+  } catch(...) {
+    DBG_INFO(80, L"Error: number of processor detection failed!\n");
+  }
+
+  if (numcore > MAX_NUMCORE) numcore = MAX_NUMCORE;
+
+  DBG_INFO(80, L"Number of processors : %d\n", numcore);
+
+  return numcore;
+}
+
+
+/*
+ * Memory buffers for texture manipulations
+ ******************************************************************************/
+TxMemBuf::TxMemBuf()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+TxMemBuf::~TxMemBuf()
+{
+  shutdown();
+}
+
+boolean
+TxMemBuf::init(int maxwidth, int maxheight)
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (!_tex[i]) {
+      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
+      _size[i] = maxwidth * maxheight * 4;
+    }
+
+    if (!_tex[i]) {
+      shutdown();
+      return 0;
+    }
+  }
+  return 1;
+}
+
+void
+TxMemBuf::shutdown()
+{
+  int i;
+  for (i = 0; i < 2; i++) {
+    if (_tex[i]) free(_tex[i]);
+    _tex[i] = NULL;
+    _size[i] = 0;
+  }
+}
+
+uint8*
+TxMemBuf::get(unsigned int num)
+{
+  return ((num < 2) ? _tex[num] : NULL);
+}
+
+uint32
+TxMemBuf::size_of(unsigned int num)
+{
+  return ((num < 2) ? _size[num] : 0);
+}
diff --git a/GLideNHQ/TxUtil.h b/GLideNHQ/TxUtil.h
new file mode 100644
index 00000000..b89f660d
--- /dev/null
+++ b/GLideNHQ/TxUtil.h
@@ -0,0 +1,121 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __TXUTIL_H__
+#define __TXUTIL_H__
+
+/* maximum number of CPU cores allowed */
+#define MAX_NUMCORE 8
+
+#include "TxInternal.h"
+#include <string>
+
+#ifndef DXTN_DLL
+#ifdef __cplusplus
+extern "C"{
+#endif
+void tx_compress_dxtn(int srccomps, int width, int height,
+                      const void *source, int destformat, void *dest,
+                      int destRowStride);
+
+int fxt1_encode(int width, int height, int comps,
+                const void *source, int srcRowStride,
+                void *dest, int destRowStride);
+#ifdef __cplusplus
+}
+#endif
+#endif /* DXTN_DLL */
+
+typedef void (*dxtCompressTexFuncExt)(int srccomps, int width,
+                                      int height, const void *srcPixData,
+                                      int destformat, void *dest,
+                                      int dstRowStride);
+
+typedef int (*fxtCompressTexFuncExt)(int width, int height, int comps,
+                                     const void *source, int srcRowStride,
+                                     void *dest, int destRowStride);
+
+class TxLoadLib
+{
+private:
+#ifdef DXTN_DLL
+  HMODULE _dxtnlib;
+#endif
+  fxtCompressTexFuncExt _tx_compress_fxt1;
+  dxtCompressTexFuncExt _tx_compress_dxtn;
+  TxLoadLib();
+public:
+  static TxLoadLib* getInstance() {
+    static TxLoadLib txLoadLib;
+    return &txLoadLib;
+  }
+  ~TxLoadLib();
+  fxtCompressTexFuncExt getfxtCompressTexFuncExt();
+  dxtCompressTexFuncExt getdxtCompressTexFuncExt();
+};
+
+class TxUtil
+{
+private:
+  uint32 Adler32(const uint8* data, int Len, uint32 Adler);
+  uint32 Adler32(const uint8* src, int width, int height, int size, int rowStride);
+  uint32 RiceCRC32(const uint8* src, int width, int height, int size, int rowStride);
+  boolean RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax);
+  boolean RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
+                        uint32* crc32, uint32* cimax);
+  int log2(int num);
+public:
+  TxUtil() { }
+  ~TxUtil() { }
+  int sizeofTx(int width, int height, uint16 format);
+  uint32 checksumTx(uint8 *data, int width, int height, uint16 format);
+#if 0 /* unused */
+  uint32 chkAlpha(uint32* src, int width, int height);
+#endif
+  uint32 checksum(uint8 *src, int width, int height, int size, int rowStride);
+  uint64 checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette);
+  int grLodLog2(int w, int h);
+  int grAspectRatioLog2(int w, int h);
+  int getNumberofProcessors();
+};
+
+class TxMemBuf
+{
+private:
+  uint8 *_tex[2];
+  uint32 _size[2];
+  TxMemBuf();
+public:
+  static TxMemBuf* getInstance() {
+    static TxMemBuf txMemBuf;
+    return &txMemBuf;
+  }
+  ~TxMemBuf();
+  boolean init(int maxwidth, int maxheight);
+  void shutdown(void);
+  uint8 *get(unsigned int num);
+  uint32 size_of(unsigned int num);
+};
+
+#endif /* __TXUTIL_H__ */
diff --git a/GLideNHQ/bldno.cpp b/GLideNHQ/bldno.cpp
new file mode 100644
index 00000000..f71eefc0
--- /dev/null
+++ b/GLideNHQ/bldno.cpp
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+int main (void)
+{
+  struct tm	locTime;
+  time_t	sysTime;
+  char	*build;
+
+  time(&sysTime);
+  locTime = *localtime(&sysTime);
+
+  if ((build = getenv("BUILD_NUMBER")) != NULL) {
+    printf("#define BUILD_NUMBER		%s\n", build);
+    printf("#define BUILD_NUMBER_STR	\"%s\"\n", build);
+  } else {
+    unsigned short magic;
+    magic = (locTime.tm_yday << 7) |
+            (locTime.tm_hour << 2) |
+            (locTime.tm_min / 15);
+    printf("#define BUILD_NUMBER		%d\n", magic);
+    printf("#define BUILD_NUMBER_STR	\"%d\"\n", magic);
+  }
+
+  return 0;
+}
diff --git a/GLideNHQ/bldno.h b/GLideNHQ/bldno.h
new file mode 100644
index 00000000..eb0ec01d
--- /dev/null
+++ b/GLideNHQ/bldno.h
@@ -0,0 +1,2 @@
+#define BUILD_NUMBER		13480
+#define BUILD_NUMBER_STR	"13480"
diff --git a/GLideNHQ/gpl-2.0.txt b/GLideNHQ/gpl-2.0.txt
new file mode 100644
index 00000000..d511905c
--- /dev/null
+++ b/GLideNHQ/gpl-2.0.txt
@@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/GLideNHQ/inc/png.h b/GLideNHQ/inc/png.h
new file mode 100644
index 00000000..c48dcdb9
--- /dev/null
+++ b/GLideNHQ/inc/png.h
@@ -0,0 +1,2686 @@
+
+/* png.h - header file for PNG reference library
+ *
+ * libpng version 1.4.1 - February 25, 2010
+ * Copyright (c) 1998-2010 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This code is released under the libpng license (See LICENSE, below)
+ *
+ * Authors and maintainers:
+ *  libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
+ *  libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
+ *  libpng versions 0.97, January 1998, through 1.4.1 - February 25, 2010: Glenn
+ *  See also "Contributing Authors", below.
+ *
+ * Note about libpng version numbers:
+ *
+ *    Due to various miscommunications, unforeseen code incompatibilities
+ *    and occasional factors outside the authors' control, version numbering
+ *    on the library has not always been consistent and straightforward.
+ *    The following table summarizes matters since version 0.89c, which was
+ *    the first widely used release:
+ *
+ *    source                 png.h  png.h  shared-lib
+ *    version                string   int  version
+ *    -------                ------ -----  ----------
+ *    0.89c "1.0 beta 3"     0.89      89  1.0.89
+ *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
+ *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
+ *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
+ *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
+ *    0.97c                  0.97      97  2.0.97
+ *    0.98                   0.98      98  2.0.98
+ *    0.99                   0.99      98  2.0.99
+ *    0.99a-m                0.99      99  2.0.99
+ *    1.00                   1.00     100  2.1.0 [100 should be 10000]
+ *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
+ *    1.0.1       png.h string is   10001  2.1.0
+ *    1.0.1a-e    identical to the  10002  from here on, the shared library
+ *    1.0.2       source version)   10002  is 2.V where V is the source code
+ *    1.0.2a-b                      10003  version, except as noted.
+ *    1.0.3                         10003
+ *    1.0.3a-d                      10004
+ *    1.0.4                         10004
+ *    1.0.4a-f                      10005
+ *    1.0.5 (+ 2 patches)           10005
+ *    1.0.5a-d                      10006
+ *    1.0.5e-r                      10100 (not source compatible)
+ *    1.0.5s-v                      10006 (not binary compatible)
+ *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
+ *    1.0.6d-f                      10007 (still binary incompatible)
+ *    1.0.6g                        10007
+ *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
+ *    1.0.6i                        10007  10.6i
+ *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
+ *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
+ *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
+ *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
+ *    1.0.7                    1    10007  (still compatible)
+ *    1.0.8beta1-4             1    10008  2.1.0.8beta1-4
+ *    1.0.8rc1                 1    10008  2.1.0.8rc1
+ *    1.0.8                    1    10008  2.1.0.8
+ *    1.0.9beta1-6             1    10009  2.1.0.9beta1-6
+ *    1.0.9rc1                 1    10009  2.1.0.9rc1
+ *    1.0.9beta7-10            1    10009  2.1.0.9beta7-10
+ *    1.0.9rc2                 1    10009  2.1.0.9rc2
+ *    1.0.9                    1    10009  2.1.0.9
+ *    1.0.10beta1              1    10010  2.1.0.10beta1
+ *    1.0.10rc1                1    10010  2.1.0.10rc1
+ *    1.0.10                   1    10010  2.1.0.10
+ *    1.0.11beta1-3            1    10011  2.1.0.11beta1-3
+ *    1.0.11rc1                1    10011  2.1.0.11rc1
+ *    1.0.11                   1    10011  2.1.0.11
+ *    1.0.12beta1-2            2    10012  2.1.0.12beta1-2
+ *    1.0.12rc1                2    10012  2.1.0.12rc1
+ *    1.0.12                   2    10012  2.1.0.12
+ *    1.1.0a-f                 -    10100  2.1.1.0a-f (branch abandoned)
+ *    1.2.0beta1-2             2    10200  2.1.2.0beta1-2
+ *    1.2.0beta3-5             3    10200  3.1.2.0beta3-5
+ *    1.2.0rc1                 3    10200  3.1.2.0rc1
+ *    1.2.0                    3    10200  3.1.2.0
+ *    1.2.1beta1-4             3    10201  3.1.2.1beta1-4
+ *    1.2.1rc1-2               3    10201  3.1.2.1rc1-2
+ *    1.2.1                    3    10201  3.1.2.1
+ *    1.2.2beta1-6            12    10202  12.so.0.1.2.2beta1-6
+ *    1.0.13beta1             10    10013  10.so.0.1.0.13beta1
+ *    1.0.13rc1               10    10013  10.so.0.1.0.13rc1
+ *    1.2.2rc1                12    10202  12.so.0.1.2.2rc1
+ *    1.0.13                  10    10013  10.so.0.1.0.13
+ *    1.2.2                   12    10202  12.so.0.1.2.2
+ *    1.2.3rc1-6              12    10203  12.so.0.1.2.3rc1-6
+ *    1.2.3                   12    10203  12.so.0.1.2.3
+ *    1.2.4beta1-3            13    10204  12.so.0.1.2.4beta1-3
+ *    1.0.14rc1               13    10014  10.so.0.1.0.14rc1
+ *    1.2.4rc1                13    10204  12.so.0.1.2.4rc1
+ *    1.0.14                  10    10014  10.so.0.1.0.14
+ *    1.2.4                   13    10204  12.so.0.1.2.4
+ *    1.2.5beta1-2            13    10205  12.so.0.1.2.5beta1-2
+ *    1.0.15rc1-3             10    10015  10.so.0.1.0.15rc1-3
+ *    1.2.5rc1-3              13    10205  12.so.0.1.2.5rc1-3
+ *    1.0.15                  10    10015  10.so.0.1.0.15
+ *    1.2.5                   13    10205  12.so.0.1.2.5
+ *    1.2.6beta1-4            13    10206  12.so.0.1.2.6beta1-4
+ *    1.0.16                  10    10016  10.so.0.1.0.16
+ *    1.2.6                   13    10206  12.so.0.1.2.6
+ *    1.2.7beta1-2            13    10207  12.so.0.1.2.7beta1-2
+ *    1.0.17rc1               10    10017  12.so.0.1.0.17rc1
+ *    1.2.7rc1                13    10207  12.so.0.1.2.7rc1
+ *    1.0.17                  10    10017  12.so.0.1.0.17
+ *    1.2.7                   13    10207  12.so.0.1.2.7
+ *    1.2.8beta1-5            13    10208  12.so.0.1.2.8beta1-5
+ *    1.0.18rc1-5             10    10018  12.so.0.1.0.18rc1-5
+ *    1.2.8rc1-5              13    10208  12.so.0.1.2.8rc1-5
+ *    1.0.18                  10    10018  12.so.0.1.0.18
+ *    1.2.8                   13    10208  12.so.0.1.2.8
+ *    1.2.9beta1-3            13    10209  12.so.0.1.2.9beta1-3
+ *    1.2.9beta4-11           13    10209  12.so.0.9[.0]
+ *    1.2.9rc1                13    10209  12.so.0.9[.0]
+ *    1.2.9                   13    10209  12.so.0.9[.0]
+ *    1.2.10beta1-7           13    10210  12.so.0.10[.0]
+ *    1.2.10rc1-2             13    10210  12.so.0.10[.0]
+ *    1.2.10                  13    10210  12.so.0.10[.0]
+ *    1.4.0beta1-5            14    10400  14.so.0.0[.0]
+ *    1.2.11beta1-4           13    10211  12.so.0.11[.0]
+ *    1.4.0beta7-8            14    10400  14.so.0.0[.0]
+ *    1.2.11                  13    10211  12.so.0.11[.0]
+ *    1.2.12                  13    10212  12.so.0.12[.0]
+ *    1.4.0beta9-14           14    10400  14.so.0.0[.0]
+ *    1.2.13                  13    10213  12.so.0.13[.0]
+ *    1.4.0beta15-36          14    10400  14.so.0.0[.0]
+ *    1.4.0beta37-87          14    10400  14.so.14.0[.0]
+ *    1.4.0rc01               14    10400  14.so.14.0[.0]
+ *    1.4.0beta88-109         14    10400  14.so.14.0[.0]
+ *    1.4.0rc02-08            14    10400  14.so.14.0[.0]
+ *    1.4.0                   14    10400  14.so.14.0[.0]
+ *    1.4.1beta01-03          14    10401  14.so.14.1[.0]
+ *    1.4.1rc01               14    10401  14.so.14.1[.0]
+ *    1.4.1beta04-12          14    10401  14.so.14.1[.0]
+ *    1.4.1rc02-04            14    10401  14.so.14.1[.0]
+ *    1.4.1                   14    10401  14.so.14.1[.0]
+ *
+ *    Henceforth the source version will match the shared-library major
+ *    and minor numbers; the shared-library major version number will be
+ *    used for changes in backward compatibility, as it is intended.  The
+ *    PNG_LIBPNG_VER macro, which is not used within libpng but is available
+ *    for applications, is an unsigned integer of the form xyyzz corresponding
+ *    to the source version x.y.z (leading zeros in y and z).  Beta versions
+ *    were given the previous public release number plus a letter, until
+ *    version 1.0.6j; from then on they were given the upcoming public
+ *    release number plus "betaNN" or "rcN".
+ *
+ *    Binary incompatibility exists only when applications make direct access
+ *    to the info_ptr or png_ptr members through png.h, and the compiled
+ *    application is loaded with a different version of the library.
+ *
+ *    DLLNUM will change each time there are forward or backward changes
+ *    in binary compatibility (e.g., when a new feature is added).
+ *
+ * See libpng.txt or libpng.3 for more information.  The PNG specification
+ * is available as a W3C Recommendation and as an ISO Specification,
+ * <http://www.w3.org/TR/2003/REC-PNG-20031110/
+ */
+
+/*
+ * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
+ *
+ * If you modify libpng you may insert additional notices immediately following
+ * this sentence.
+ *
+ * This code is released under the libpng license.
+ *
+ * libpng versions 1.2.6, August 15, 2004, through 1.4.1, February 25, 2010, are
+ * Copyright (c) 2004, 2006-2010 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.2.5
+ * with the following individual added to the list of Contributing Authors:
+ *
+ *    Cosmin Truta
+ *
+ * libpng versions 1.0.7, July 1, 2000, through 1.2.5, October 3, 2002, are
+ * Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.0.6
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Simon-Pierre Cadieux
+ *    Eric S. Raymond
+ *    Gilles Vollant
+ *
+ * and with the following additions to the disclaimer:
+ *
+ *    There is no warranty against interference with your enjoyment of the
+ *    library or against infringement.  There is no warranty that our
+ *    efforts or the library will fulfill any of your particular purposes
+ *    or needs.  This library is provided with all faults, and the entire
+ *    risk of satisfactory quality, performance, accuracy, and effort is with
+ *    the user.
+ *
+ * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
+ * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-0.96,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Tom Lane
+ *    Glenn Randers-Pehrson
+ *    Willem van Schaik
+ *
+ * libpng versions 0.89, June 1996, through 0.96, May 1997, are
+ * Copyright (c) 1996, 1997 Andreas Dilger
+ * Distributed according to the same disclaimer and license as libpng-0.88,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    John Bowler
+ *    Kevin Bracey
+ *    Sam Bushell
+ *    Magnus Holmgren
+ *    Greg Roelofs
+ *    Tom Tanner
+ *
+ * libpng versions 0.5, May 1995, through 0.88, January 1996, are
+ * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * For the purposes of this copyright and license, "Contributing Authors"
+ * is defined as the following set of individuals:
+ *
+ *    Andreas Dilger
+ *    Dave Martindale
+ *    Guy Eric Schalnat
+ *    Paul Schmidt
+ *    Tim Wegner
+ *
+ * The PNG Reference Library is supplied "AS IS".  The Contributing Authors
+ * and Group 42, Inc. disclaim all warranties, expressed or implied,
+ * including, without limitation, the warranties of merchantability and of
+ * fitness for any purpose.  The Contributing Authors and Group 42, Inc.
+ * assume no liability for direct, indirect, incidental, special, exemplary,
+ * or consequential damages, which may result from the use of the PNG
+ * Reference Library, even if advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * source code, or portions hereof, for any purpose, without fee, subject
+ * to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ *
+ * 2. Altered versions must be plainly marked as such and
+ * must not be misrepresented as being the original source.
+ *
+ * 3. This Copyright notice may not be removed or altered from
+ *    any source or altered source distribution.
+ *
+ * The Contributing Authors and Group 42, Inc. specifically permit, without
+ * fee, and encourage the use of this source code as a component to
+ * supporting the PNG file format in commercial products.  If you use this
+ * source code in a product, acknowledgment is not required but would be
+ * appreciated.
+ */
+
+/*
+ * A "png_get_copyright" function is available, for convenient use in "about"
+ * boxes and the like:
+ *
+ *     printf("%s",png_get_copyright(NULL));
+ *
+ * Also, the PNG logo (in PNG format, of course) is supplied in the
+ * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
+ */
+
+/*
+ * Libpng is OSI Certified Open Source Software.  OSI Certified is a
+ * certification mark of the Open Source Initiative.
+ */
+
+/*
+ * The contributing authors would like to thank all those who helped
+ * with testing, bug fixes, and patience.  This wouldn't have been
+ * possible without all of you.
+ *
+ * Thanks to Frank J. T. Wojcik for helping with the documentation.
+ */
+
+/*
+ * Y2K compliance in libpng:
+ * =========================
+ *
+ *    February 25, 2010
+ *
+ *    Since the PNG Development group is an ad-hoc body, we can't make
+ *    an official declaration.
+ *
+ *    This is your unofficial assurance that libpng from version 0.71 and
+ *    upward through 1.4.1 are Y2K compliant.  It is my belief that earlier
+ *    versions were also Y2K compliant.
+ *
+ *    Libpng only has three year fields.  One is a 2-byte unsigned integer
+ *    that will hold years up to 65535.  The other two hold the date in text
+ *    format, and will hold years up to 9999.
+ *
+ *    The integer is
+ *        "png_uint_16 year" in png_time_struct.
+ *
+ *    The strings are
+ *        "png_charp time_buffer" in png_struct and
+ *        "near_time_buffer", which is a local character string in png.c.
+ *
+ *    There are seven time-related functions:
+ *        png.c: png_convert_to_rfc_1123() in png.c
+ *          (formerly png_convert_to_rfc_1152() in error)
+ *        png_convert_from_struct_tm() in pngwrite.c, called in pngwrite.c
+ *        png_convert_from_time_t() in pngwrite.c
+ *        png_get_tIME() in pngget.c
+ *        png_handle_tIME() in pngrutil.c, called in pngread.c
+ *        png_set_tIME() in pngset.c
+ *        png_write_tIME() in pngwutil.c, called in pngwrite.c
+ *
+ *    All handle dates properly in a Y2K environment.  The
+ *    png_convert_from_time_t() function calls gmtime() to convert from system
+ *    clock time, which returns (year - 1900), which we properly convert to
+ *    the full 4-digit year.  There is a possibility that applications using
+ *    libpng are not passing 4-digit years into the png_convert_to_rfc_1123()
+ *    function, or that they are incorrectly passing only a 2-digit year
+ *    instead of "year - 1900" into the png_convert_from_struct_tm() function,
+ *    but this is not under our control.  The libpng documentation has always
+ *    stated that it works with 4-digit years, and the APIs have been
+ *    documented as such.
+ *
+ *    The tIME chunk itself is also Y2K compliant.  It uses a 2-byte unsigned
+ *    integer to hold the year, and can hold years as large as 65535.
+ *
+ *    zlib, upon which libpng depends, is also Y2K compliant.  It contains
+ *    no date-related code.
+ *
+ *       Glenn Randers-Pehrson
+ *       libpng maintainer
+ *       PNG Development Group
+ */
+
+#ifndef PNG_H
+#define PNG_H
+
+/* This is not the place to learn how to use libpng.  The file libpng.txt
+ * describes how to use libpng, and the file example.c summarizes it
+ * with some code on which to build.  This file is useful for looking
+ * at the actual function definitions and structure components.
+ */
+
+/* Version information for png.h - this should match the version in png.c */
+#define PNG_LIBPNG_VER_STRING "1.4.1"
+#define PNG_HEADER_VERSION_STRING \
+   " libpng version 1.4.1 - February 25, 2010\n"
+
+#define PNG_LIBPNG_VER_SONUM   14
+#define PNG_LIBPNG_VER_DLLNUM  14
+
+/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
+#define PNG_LIBPNG_VER_MAJOR   1
+#define PNG_LIBPNG_VER_MINOR   4
+#define PNG_LIBPNG_VER_RELEASE 1
+/* This should match the numeric part of the final component of
+ * PNG_LIBPNG_VER_STRING, omitting any leading zero:
+ */
+
+#define PNG_LIBPNG_VER_BUILD  0
+
+/* Release Status */
+#define PNG_LIBPNG_BUILD_ALPHA    1
+#define PNG_LIBPNG_BUILD_BETA     2
+#define PNG_LIBPNG_BUILD_RC       3
+#define PNG_LIBPNG_BUILD_STABLE   4
+#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
+
+/* Release-Specific Flags */
+#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
+                                       PNG_LIBPNG_BUILD_STABLE only */
+#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_SPECIAL */
+#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_PRIVATE */
+
+#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_BETA
+
+/* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
+ * We must not include leading zeros.
+ * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only
+ * version 1.0.0 was mis-numbered 100 instead of 10000).  From
+ * version 1.0.1 it's    xxyyzz, where x=major, y=minor, z=release
+ */
+#define PNG_LIBPNG_VER 10401 /* 1.4.1 */
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* Include the compression library's header */
+#include "zlib.h"
+#endif
+
+/* Include all user configurable info, including optional assembler routines */
+#include "pngconf.h"
+
+/*
+ * Added at libpng-1.2.8
+ *
+ * Ref MSDN: Private as priority over Special
+ * VS_FF_PRIVATEBUILD File *was not* built using standard release
+ * procedures. If this value is given, the StringFileInfo block must
+ * contain a PrivateBuild string.
+ *
+ * VS_FF_SPECIALBUILD File *was* built by the original company using
+ * standard release procedures but is a variation of the standard
+ * file of the same version number. If this value is given, the
+ * StringFileInfo block must contain a SpecialBuild string.
+ */
+
+#ifdef PNG_USER_PRIVATEBUILD
+#  define PNG_LIBPNG_BUILD_TYPE \
+          (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
+#else
+#  ifdef PNG_LIBPNG_SPECIALBUILD
+#    define PNG_LIBPNG_BUILD_TYPE \
+            (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
+#  else
+#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
+#  endif
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* This file is arranged in several sections.  The first section contains
+ * structure and type definitions.  The second section contains the external
+ * library functions, while the third has the internal library functions,
+ * which applications aren't expected to use directly.
+ */
+
+/* Variables declared in png.c - only it needs to define PNG_NO_EXTERN */
+#if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
+/* Version information for C files, stored in png.c.  This had better match
+ * the version above.
+ */
+#define png_libpng_ver png_get_header_ver(NULL)
+
+#endif /* PNG_NO_EXTERN */
+
+/* Three color definitions.  The order of the red, green, and blue, (and the
+ * exact size) is not important, although the size of the fields need to
+ * be png_byte or png_uint_16 (as defined below).
+ */
+typedef struct png_color_struct
+{
+   png_byte red;
+   png_byte green;
+   png_byte blue;
+} png_color;
+typedef png_color FAR * png_colorp;
+typedef png_color FAR * FAR * png_colorpp;
+
+typedef struct png_color_16_struct
+{
+   png_byte index;    /* used for palette files */
+   png_uint_16 red;   /* for use in red green blue files */
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 gray;  /* for use in grayscale files */
+} png_color_16;
+typedef png_color_16 FAR * png_color_16p;
+typedef png_color_16 FAR * FAR * png_color_16pp;
+
+typedef struct png_color_8_struct
+{
+   png_byte red;   /* for use in red green blue files */
+   png_byte green;
+   png_byte blue;
+   png_byte gray;  /* for use in grayscale files */
+   png_byte alpha; /* for alpha channel files */
+} png_color_8;
+typedef png_color_8 FAR * png_color_8p;
+typedef png_color_8 FAR * FAR * png_color_8pp;
+
+/*
+ * The following two structures are used for the in-core representation
+ * of sPLT chunks.
+ */
+typedef struct png_sPLT_entry_struct
+{
+   png_uint_16 red;
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 alpha;
+   png_uint_16 frequency;
+} png_sPLT_entry;
+typedef png_sPLT_entry FAR * png_sPLT_entryp;
+typedef png_sPLT_entry FAR * FAR * png_sPLT_entrypp;
+
+/*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
+ *  occupy the LSB of their respective members, and the MSB of each member
+ *  is zero-filled.  The frequency member always occupies the full 16 bits.
+ */
+
+typedef struct png_sPLT_struct
+{
+   png_charp name;           /* palette name */
+   png_byte depth;           /* depth of palette samples */
+   png_sPLT_entryp entries;  /* palette entries */
+   png_int_32 nentries;      /* number of palette entries */
+} png_sPLT_t;
+typedef png_sPLT_t FAR * png_sPLT_tp;
+typedef png_sPLT_t FAR * FAR * png_sPLT_tpp;
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
+ * and whether that contents is compressed or not.  The "key" field
+ * points to a regular zero-terminated C string.  The "text", "lang", and
+ * "lang_key" fields can be regular C strings, empty strings, or NULL pointers.
+ * However, the * structure returned by png_get_text() will always contain
+ * regular zero-terminated C strings (possibly empty), never NULL pointers,
+ * so they can be safely used in printf() and other string-handling functions.
+ */
+typedef struct png_text_struct
+{
+   int  compression;       /* compression value:
+                             -1: tEXt, none
+                              0: zTXt, deflate
+                              1: iTXt, none
+                              2: iTXt, deflate  */
+   png_charp key;          /* keyword, 1-79 character description of "text" */
+   png_charp text;         /* comment, may be an empty string (ie "")
+                              or a NULL pointer */
+   png_size_t text_length; /* length of the text string */
+#ifdef PNG_iTXt_SUPPORTED
+   png_size_t itxt_length; /* length of the itxt string */
+   png_charp lang;         /* language code, 0-79 characters
+                              or a NULL pointer */
+   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
+                              chars or a NULL pointer */
+#endif
+} png_text;
+typedef png_text FAR * png_textp;
+typedef png_text FAR * FAR * png_textpp;
+#endif
+
+/* Supported compression types for text in PNG files (tEXt, and zTXt).
+ * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
+#define PNG_TEXT_COMPRESSION_NONE_WR -3
+#define PNG_TEXT_COMPRESSION_zTXt_WR -2
+#define PNG_TEXT_COMPRESSION_NONE    -1
+#define PNG_TEXT_COMPRESSION_zTXt     0
+#define PNG_ITXT_COMPRESSION_NONE     1
+#define PNG_ITXT_COMPRESSION_zTXt     2
+#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
+
+/* png_time is a way to hold the time in an machine independent way.
+ * Two conversions are provided, both from time_t and struct tm.  There
+ * is no portable way to convert to either of these structures, as far
+ * as I know.  If you know of a portable way, send it to me.  As a side
+ * note - PNG has always been Year 2000 compliant!
+ */
+typedef struct png_time_struct
+{
+   png_uint_16 year; /* full year, as in, 1995 */
+   png_byte month;   /* month of year, 1 - 12 */
+   png_byte day;     /* day of month, 1 - 31 */
+   png_byte hour;    /* hour of day, 0 - 23 */
+   png_byte minute;  /* minute of hour, 0 - 59 */
+   png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
+} png_time;
+typedef png_time FAR * png_timep;
+typedef png_time FAR * FAR * png_timepp;
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \
+ defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
+/* png_unknown_chunk is a structure to hold queued chunks for which there is
+ * no specific support.  The idea is that we can use this to queue
+ * up private chunks for output even though the library doesn't actually
+ * know about their semantics.
+ */
+typedef struct png_unknown_chunk_t
+{
+    png_byte name[5];
+    png_byte *data;
+    png_size_t size;
+
+    /* libpng-using applications should NOT directly modify this byte. */
+    png_byte location; /* mode of operation at read time */
+}
+png_unknown_chunk;
+typedef png_unknown_chunk FAR * png_unknown_chunkp;
+typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp;
+#endif
+
+/* png_info is a structure that holds the information in a PNG file so
+ * that the application can find out the characteristics of the image.
+ * If you are reading the file, this structure will tell you what is
+ * in the PNG file.  If you are writing the file, fill in the information
+ * you want to put into the PNG file, then call png_write_info().
+ * The names chosen should be very close to the PNG specification, so
+ * consult that document for information about the meaning of each field.
+ *
+ * With libpng < 0.95, it was only possible to directly set and read the
+ * the values in the png_info_struct, which meant that the contents and
+ * order of the values had to remain fixed.  With libpng 0.95 and later,
+ * however, there are now functions that abstract the contents of
+ * png_info_struct from the application, so this makes it easier to use
+ * libpng with dynamic libraries, and even makes it possible to use
+ * libraries that don't have all of the libpng ancillary chunk-handing
+ * functionality.
+ *
+ * In any case, the order of the parameters in png_info_struct should NOT
+ * be changed for as long as possible to keep compatibility with applications
+ * that use the old direct-access method with png_info_struct.
+ *
+ * The following members may have allocated storage attached that should be
+ * cleaned up before the structure is discarded: palette, trans, text,
+ * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
+ * splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
+ * are automatically freed when the info structure is deallocated, if they were
+ * allocated internally by libpng.  This behavior can be changed by means
+ * of the png_data_freer() function.
+ *
+ * More allocation details: all the chunk-reading functions that
+ * change these members go through the corresponding png_set_*
+ * functions.  A function to clear these members is available: see
+ * png_free_data().  The png_set_* functions do not depend on being
+ * able to point info structure members to any of the storage they are
+ * passed (they make their own copies), EXCEPT that the png_set_text
+ * functions use the same storage passed to them in the text_ptr or
+ * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
+ * functions do not make their own copies.
+ */
+typedef struct png_info_struct
+{
+   /* the following are necessary for every PNG file */
+   png_uint_32 width PNG_DEPSTRUCT;  /* width of image in pixels (from IHDR) */
+   png_uint_32 height PNG_DEPSTRUCT; /* height of image in pixels (from IHDR) */
+   png_uint_32 valid PNG_DEPSTRUCT;  /* valid chunk data (see PNG_INFO_
+                                        below) */
+   png_size_t rowbytes PNG_DEPSTRUCT; /* bytes needed to hold an untransformed
+                                         row */
+   png_colorp palette PNG_DEPSTRUCT;      /* array of color values
+                                             (valid & PNG_INFO_PLTE) */
+   png_uint_16 num_palette PNG_DEPSTRUCT; /* number of color entries in
+                                             "palette" (PLTE) */
+   png_uint_16 num_trans PNG_DEPSTRUCT;   /* number of transparent palette
+                                             color (tRNS) */
+   png_byte bit_depth PNG_DEPSTRUCT;      /* 1, 2, 4, 8, or 16 bits/channel
+                                             (from IHDR) */
+   png_byte color_type PNG_DEPSTRUCT;     /* see PNG_COLOR_TYPE_ below
+                                             (from IHDR) */
+   /* The following three should have been named *_method not *_type */
+   png_byte compression_type PNG_DEPSTRUCT; /* must be
+                                             PNG_COMPRESSION_TYPE_BASE (IHDR) */
+   png_byte filter_type PNG_DEPSTRUCT;    /* must be PNG_FILTER_TYPE_BASE
+                                             (from IHDR) */
+   png_byte interlace_type PNG_DEPSTRUCT; /* One of PNG_INTERLACE_NONE,
+                                             PNG_INTERLACE_ADAM7 */
+
+   /* The following is informational only on read, and not used on writes. */
+   png_byte channels PNG_DEPSTRUCT;       /* number of data channels per
+                                             pixel (1, 2, 3, 4) */
+   png_byte pixel_depth PNG_DEPSTRUCT;    /* number of bits per pixel */
+   png_byte spare_byte PNG_DEPSTRUCT;     /* to align the data, and for
+                                             future use */
+   png_byte signature[8] PNG_DEPSTRUCT;   /* magic bytes read by libpng
+                                             from start of file */
+
+   /* The rest of the data is optional.  If you are reading, check the
+    * valid field to see if the information in these are valid.  If you
+    * are writing, set the valid field to those chunks you want written,
+    * and initialize the appropriate fields below.
+    */
+
+#if defined(PNG_gAMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+   /* The gAMA chunk describes the gamma characteristics of the system
+    * on which the image was created, normally in the range [1.0, 2.5].
+    * Data is valid if (valid & PNG_INFO_gAMA) is non-zero.
+    */
+   float gamma PNG_DEPSTRUCT; /* gamma value of image,
+                                 if (valid & PNG_INFO_gAMA) */
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+    /* GR-P, 0.96a */
+    /* Data valid if (valid & PNG_INFO_sRGB) non-zero. */
+   png_byte srgb_intent PNG_DEPSTRUCT; /* sRGB rendering intent
+                                          [0, 1, 2, or 3] */
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+   /* The tEXt, and zTXt chunks contain human-readable textual data in
+    * uncompressed, compressed, and optionally compressed forms, respectively.
+    * The data in "text" is an array of pointers to uncompressed,
+    * null-terminated C strings. Each chunk has a keyword that describes the
+    * textual data contained in that chunk.  Keywords are not required to be
+    * unique, and the text string may be empty.  Any number of text chunks may
+    * be in an image.
+    */
+   int num_text PNG_DEPSTRUCT; /* number of comments read/to write */
+   int max_text PNG_DEPSTRUCT; /* current size of text array */
+   png_textp text PNG_DEPSTRUCT; /* array of comments read/to write */
+#endif /* PNG_TEXT_SUPPORTED */
+
+#ifdef PNG_tIME_SUPPORTED
+   /* The tIME chunk holds the last time the displayed image data was
+    * modified.  See the png_time struct for the contents of this struct.
+    */
+   png_time mod_time PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_sBIT_SUPPORTED
+   /* The sBIT chunk specifies the number of significant high-order bits
+    * in the pixel data.  Values are in the range [1, bit_depth], and are
+    * only specified for the channels in the pixel data.  The contents of
+    * the low-order bits is not specified.  Data is valid if
+    * (valid & PNG_INFO_sBIT) is non-zero.
+    */
+   png_color_8 sig_bit PNG_DEPSTRUCT; /* significant bits in color channels */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
+defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The tRNS chunk supplies transparency data for paletted images and
+    * other image types that don't need a full alpha channel.  There are
+    * "num_trans" transparency values for a paletted image, stored in the
+    * same order as the palette colors, starting from index 0.  Values
+    * for the data are in the range [0, 255], ranging from fully transparent
+    * to fully opaque, respectively.  For non-paletted images, there is a
+    * single color specified that should be treated as fully transparent.
+    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
+    */
+   png_bytep trans_alpha PNG_DEPSTRUCT;    /* alpha values for paletted
+                                              image */
+   png_color_16 trans_color PNG_DEPSTRUCT; /* transparent color for
+                                              non-palette image */
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The bKGD chunk gives the suggested image background color if the
+    * display program does not have its own background color and the image
+    * is needs to composited onto a background before display.  The colors
+    * in "background" are normally in the same color space/depth as the
+    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
+    */
+   png_color_16 background PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_oFFs_SUPPORTED
+   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
+    * and downwards from the top-left corner of the display, page, or other
+    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
+    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
+    */
+   png_int_32 x_offset PNG_DEPSTRUCT; /* x offset on page */
+   png_int_32 y_offset PNG_DEPSTRUCT; /* y offset on page */
+   png_byte offset_unit_type PNG_DEPSTRUCT; /* offset units type */
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+   /* The pHYs chunk gives the physical pixel density of the image for
+    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
+    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
+    */
+   png_uint_32 x_pixels_per_unit PNG_DEPSTRUCT; /* horizontal pixel density */
+   png_uint_32 y_pixels_per_unit PNG_DEPSTRUCT; /* vertical pixel density */
+   png_byte phys_unit_type PNG_DEPSTRUCT; /* resolution type (see
+                                             PNG_RESOLUTION_ below) */
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+   /* The hIST chunk contains the relative frequency or importance of the
+    * various palette entries, so that a viewer can intelligently select a
+    * reduced-color palette, if required.  Data is an array of "num_palette"
+    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
+    * is non-zero.
+    */
+   png_uint_16p hist PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+   /* The cHRM chunk describes the CIE color characteristics of the monitor
+    * on which the PNG was created.  This data allows the viewer to do gamut
+    * mapping of the input image to ensure that the viewer sees the same
+    * colors in the image as the creator.  Values are in the range
+    * [0.0, 0.8].  Data valid if (valid & PNG_INFO_cHRM) non-zero.
+    */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float x_white PNG_DEPSTRUCT;
+   float y_white PNG_DEPSTRUCT;
+   float x_red PNG_DEPSTRUCT;
+   float y_red PNG_DEPSTRUCT;
+   float x_green PNG_DEPSTRUCT;
+   float y_green PNG_DEPSTRUCT;
+   float x_blue PNG_DEPSTRUCT;
+   float y_blue PNG_DEPSTRUCT;
+#endif
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+   /* The pCAL chunk describes a transformation between the stored pixel
+    * values and original physical data values used to create the image.
+    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
+    * range given by [pcal_X0, pcal_X1], and are further transformed by a
+    * (possibly non-linear) transformation function given by "pcal_type"
+    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
+    * defines below, and the PNG-Group's PNG extensions document for a
+    * complete description of the transformations and how they should be
+    * implemented, and for a description of the ASCII parameter strings.
+    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
+    */
+   png_charp pcal_purpose PNG_DEPSTRUCT;  /* pCAL chunk description string */
+   png_int_32 pcal_X0 PNG_DEPSTRUCT;      /* minimum value */
+   png_int_32 pcal_X1 PNG_DEPSTRUCT;      /* maximum value */
+   png_charp pcal_units PNG_DEPSTRUCT;    /* Latin-1 string giving physical
+                                             units */
+   png_charpp pcal_params PNG_DEPSTRUCT;  /* ASCII strings containing
+                                             parameter values */
+   png_byte pcal_type PNG_DEPSTRUCT;      /* equation type
+                                             (see PNG_EQUATION_ below) */
+   png_byte pcal_nparams PNG_DEPSTRUCT;   /* number of parameters given
+                                             in pcal_params */
+#endif
+
+/* New members added in libpng-1.0.6 */
+   png_uint_32 free_me PNG_DEPSTRUCT;     /* flags items libpng is
+                                             responsible for freeing */
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) || \
+ defined(PNG_HANDLE_AS_UNKNOWN_SUPPORTED)
+   /* Storage for unknown chunks that the library doesn't recognize. */
+   png_unknown_chunkp unknown_chunks PNG_DEPSTRUCT;
+   png_size_t unknown_chunks_num PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+   /* iCCP chunk data. */
+   png_charp iccp_name PNG_DEPSTRUCT;     /* profile name */
+   png_charp iccp_profile PNG_DEPSTRUCT;  /* International Color Consortium
+                                             profile data */
+                            /* Note to maintainer: should be png_bytep */
+   png_uint_32 iccp_proflen PNG_DEPSTRUCT;  /* ICC profile data length */
+   png_byte iccp_compression PNG_DEPSTRUCT; /* Always zero */
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+   /* Data on sPLT chunks (there may be more than one). */
+   png_sPLT_tp splt_palettes PNG_DEPSTRUCT;
+   png_uint_32 splt_palettes_num PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+   /* The sCAL chunk describes the actual physical dimensions of the
+    * subject matter of the graphic.  The chunk contains a unit specification
+    * a byte value, and two ASCII strings representing floating-point
+    * values.  The values are width and height corresponsing to one pixel
+    * in the image.  This external representation is converted to double
+    * here.  Data values are valid if (valid & PNG_INFO_sCAL) is non-zero.
+    */
+   png_byte scal_unit PNG_DEPSTRUCT;         /* unit of physical scale */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double scal_pixel_width PNG_DEPSTRUCT;    /* width of one pixel */
+   double scal_pixel_height PNG_DEPSTRUCT;   /* height of one pixel */
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp scal_s_width PNG_DEPSTRUCT;     /* string containing height */
+   png_charp scal_s_height PNG_DEPSTRUCT;    /* string containing width */
+#endif
+#endif
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS)
+      non-zero */
+   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
+   png_bytepp row_pointers PNG_DEPSTRUCT;        /* the image bits */
+#endif
+
+#if defined(PNG_FIXED_POINT_SUPPORTED) && defined(PNG_gAMA_SUPPORTED)
+   png_fixed_point int_gamma PNG_DEPSTRUCT; /* gamma of image,
+                                               if (valid & PNG_INFO_gAMA) */
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED) && defined(PNG_FIXED_POINT_SUPPORTED)
+   png_fixed_point int_x_white PNG_DEPSTRUCT;
+   png_fixed_point int_y_white PNG_DEPSTRUCT;
+   png_fixed_point int_x_red PNG_DEPSTRUCT;
+   png_fixed_point int_y_red PNG_DEPSTRUCT;
+   png_fixed_point int_x_green PNG_DEPSTRUCT;
+   png_fixed_point int_y_green PNG_DEPSTRUCT;
+   png_fixed_point int_x_blue PNG_DEPSTRUCT;
+   png_fixed_point int_y_blue PNG_DEPSTRUCT;
+#endif
+
+} png_info;
+
+typedef png_info FAR * png_infop;
+typedef png_info FAR * FAR * png_infopp;
+
+/* Maximum positive integer used in PNG is (2^31)-1 */
+#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
+#define PNG_UINT_32_MAX ((png_uint_32)(-1))
+#define PNG_SIZE_MAX ((png_size_t)(-1))
+
+/* These describe the color_type field in png_info. */
+/* color type masks */
+#define PNG_COLOR_MASK_PALETTE    1
+#define PNG_COLOR_MASK_COLOR      2
+#define PNG_COLOR_MASK_ALPHA      4
+
+/* color types.  Note that not all combinations are legal */
+#define PNG_COLOR_TYPE_GRAY 0
+#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
+#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
+#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
+#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
+/* aliases */
+#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
+#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
+
+/* This is for compression type. PNG 1.0-1.2 only define the single type. */
+#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
+#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
+
+/* This is for filter type. PNG 1.0-1.2 only define the single type. */
+#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
+#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
+#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
+
+/* These are for the interlacing type.  These values should NOT be changed. */
+#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
+#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
+#define PNG_INTERLACE_LAST        2 /* Not a valid value */
+
+/* These are for the oFFs chunk.  These values should NOT be changed. */
+#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
+#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
+#define PNG_OFFSET_LAST           2 /* Not a valid value */
+
+/* These are for the pCAL chunk.  These values should NOT be changed. */
+#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
+#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
+#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
+#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
+#define PNG_EQUATION_LAST         4 /* Not a valid value */
+
+/* These are for the sCAL chunk.  These values should NOT be changed. */
+#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
+#define PNG_SCALE_METER           1 /* meters per pixel */
+#define PNG_SCALE_RADIAN          2 /* radians per pixel */
+#define PNG_SCALE_LAST            3 /* Not a valid value */
+
+/* These are for the pHYs chunk.  These values should NOT be changed. */
+#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
+#define PNG_RESOLUTION_METER      1 /* pixels/meter */
+#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
+
+/* These are for the sRGB chunk.  These values should NOT be changed. */
+#define PNG_sRGB_INTENT_PERCEPTUAL 0
+#define PNG_sRGB_INTENT_RELATIVE   1
+#define PNG_sRGB_INTENT_SATURATION 2
+#define PNG_sRGB_INTENT_ABSOLUTE   3
+#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
+
+/* This is for text chunks */
+#define PNG_KEYWORD_MAX_LENGTH     79
+
+/* Maximum number of entries in PLTE/sPLT/tRNS arrays */
+#define PNG_MAX_PALETTE_LENGTH    256
+
+/* These determine if an ancillary chunk's data has been successfully read
+ * from the PNG header, or if the application has filled in the corresponding
+ * data in the info_struct to be written into the output file.  The values
+ * of the PNG_INFO_<chunk> defines should NOT be changed.
+ */
+#define PNG_INFO_gAMA 0x0001
+#define PNG_INFO_sBIT 0x0002
+#define PNG_INFO_cHRM 0x0004
+#define PNG_INFO_PLTE 0x0008
+#define PNG_INFO_tRNS 0x0010
+#define PNG_INFO_bKGD 0x0020
+#define PNG_INFO_hIST 0x0040
+#define PNG_INFO_pHYs 0x0080
+#define PNG_INFO_oFFs 0x0100
+#define PNG_INFO_tIME 0x0200
+#define PNG_INFO_pCAL 0x0400
+#define PNG_INFO_sRGB 0x0800   /* GR-P, 0.96a */
+#define PNG_INFO_iCCP 0x1000   /* ESR, 1.0.6 */
+#define PNG_INFO_sPLT 0x2000   /* ESR, 1.0.6 */
+#define PNG_INFO_sCAL 0x4000   /* ESR, 1.0.6 */
+#define PNG_INFO_IDAT 0x8000L  /* ESR, 1.0.6 */
+
+/* This is used for the transformation routines, as some of them
+ * change these values for the row.  It also should enable using
+ * the routines for other purposes.
+ */
+typedef struct png_row_info_struct
+{
+   png_uint_32 width; /* width of row */
+   png_size_t rowbytes; /* number of bytes in row */
+   png_byte color_type; /* color type of row */
+   png_byte bit_depth; /* bit depth of row */
+   png_byte channels; /* number of channels (1, 2, 3, or 4) */
+   png_byte pixel_depth; /* bits per pixel (depth * channels) */
+} png_row_info;
+
+typedef png_row_info FAR * png_row_infop;
+typedef png_row_info FAR * FAR * png_row_infopp;
+
+/* These are the function types for the I/O functions and for the functions
+ * that allow the user to override the default I/O functions with his or her
+ * own.  The png_error_ptr type should match that of user-supplied warning
+ * and error functions, while the png_rw_ptr type should match that of the
+ * user read/write data functions.
+ */
+typedef struct png_struct_def png_struct;
+typedef png_struct FAR * png_structp;
+
+typedef void (PNGAPI *png_error_ptr) PNGARG((png_structp, png_const_charp));
+typedef void (PNGAPI *png_rw_ptr) PNGARG((png_structp, png_bytep, png_size_t));
+typedef void (PNGAPI *png_flush_ptr) PNGARG((png_structp));
+typedef void (PNGAPI *png_read_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+typedef void (PNGAPI *png_write_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+typedef void (PNGAPI *png_progressive_info_ptr) PNGARG((png_structp,
+   png_infop));
+typedef void (PNGAPI *png_progressive_end_ptr) PNGARG((png_structp, png_infop));
+typedef void (PNGAPI *png_progressive_row_ptr) PNGARG((png_structp, png_bytep,
+   png_uint_32, int));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+typedef void (PNGAPI *png_user_transform_ptr) PNGARG((png_structp,
+    png_row_infop, png_bytep));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+typedef int (PNGAPI *png_user_chunk_ptr) PNGARG((png_structp,
+   png_unknown_chunkp));
+#endif
+#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
+typedef void (PNGAPI *png_unknown_chunk_ptr) PNGARG((png_structp));
+#endif
+#ifdef PNG_SETJMP_SUPPORTED
+/* This must match the function definition in <setjmp.h>, and the
+ * application must include this before png.h to obtain the definition
+ * of jmp_buf.
+ */
+typedef void (PNGAPI *png_longjmp_ptr) PNGARG((jmp_buf, int));
+#endif
+
+/* Transform masks for the high-level interface */
+#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
+#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
+#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
+#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
+#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
+#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
+#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
+#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
+#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
+#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
+#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
+#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
+#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* write only */
+/* Added to libpng-1.2.34 */
+#define PNG_TRANSFORM_STRIP_FILLER_BEFORE PNG_TRANSFORM_STRIP_FILLER
+#define PNG_TRANSFORM_STRIP_FILLER_AFTER 0x1000 /* write only */
+/* Added to libpng-1.4.0 */
+#define PNG_TRANSFORM_GRAY_TO_RGB   0x2000      /* read only */
+
+/* Flags for MNG supported features */
+#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
+#define PNG_FLAG_MNG_FILTER_64      0x04
+#define PNG_ALL_MNG_FEATURES        0x05
+
+typedef png_voidp (*png_malloc_ptr) PNGARG((png_structp, png_alloc_size_t));
+typedef void (*png_free_ptr) PNGARG((png_structp, png_voidp));
+
+/* The structure that holds the information to read and write PNG files.
+ * The only people who need to care about what is inside of this are the
+ * people who will be modifying the library for their own special needs.
+ * It should NOT be accessed directly by an application, except to store
+ * the jmp_buf.
+ */
+
+struct png_struct_def
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf jmpbuf PNG_DEPSTRUCT;            /* used in png_error */
+   png_longjmp_ptr longjmp_fn PNG_DEPSTRUCT;/* setjmp non-local goto
+                                               function. */
+#endif
+   png_error_ptr error_fn PNG_DEPSTRUCT;    /* function for printing
+                                               errors and aborting */
+   png_error_ptr warning_fn PNG_DEPSTRUCT;  /* function for printing
+                                               warnings */
+   png_voidp error_ptr PNG_DEPSTRUCT;       /* user supplied struct for
+                                               error functions */
+   png_rw_ptr write_data_fn PNG_DEPSTRUCT;  /* function for writing
+                                               output data */
+   png_rw_ptr read_data_fn PNG_DEPSTRUCT;   /* function for reading
+                                               input data */
+   png_voidp io_ptr PNG_DEPSTRUCT;          /* ptr to application struct
+                                               for I/O functions */
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+   png_user_transform_ptr read_user_transform_fn PNG_DEPSTRUCT; /* user read
+                                                                 transform */
+#endif
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+   png_user_transform_ptr write_user_transform_fn PNG_DEPSTRUCT; /* user write
+                                                                  transform */
+#endif
+
+/* These were added in libpng-1.0.2 */
+#ifdef PNG_USER_TRANSFORM_PTR_SUPPORTED
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_voidp user_transform_ptr PNG_DEPSTRUCT; /* user supplied struct
+                                                  for user transform */
+   png_byte user_transform_depth PNG_DEPSTRUCT;    /* bit depth of user
+                                                      transformed pixels */
+   png_byte user_transform_channels PNG_DEPSTRUCT; /* channels in user
+                                                      transformed pixels */
+#endif
+#endif
+
+   png_uint_32 mode PNG_DEPSTRUCT;          /* tells us where we are in
+                                               the PNG file */
+   png_uint_32 flags PNG_DEPSTRUCT;         /* flags indicating various
+                                               things to libpng */
+   png_uint_32 transformations PNG_DEPSTRUCT; /* which transformations
+                                                 to perform */
+
+   z_stream zstream PNG_DEPSTRUCT;          /* pointer to decompression
+                                               structure (below) */
+   png_bytep zbuf PNG_DEPSTRUCT;            /* buffer for zlib */
+   png_size_t zbuf_size PNG_DEPSTRUCT;      /* size of zbuf */
+   int zlib_level PNG_DEPSTRUCT;            /* holds zlib compression level */
+   int zlib_method PNG_DEPSTRUCT;           /* holds zlib compression method */
+   int zlib_window_bits PNG_DEPSTRUCT;      /* holds zlib compression window
+                                               bits */
+   int zlib_mem_level PNG_DEPSTRUCT;        /* holds zlib compression memory
+                                               level */
+   int zlib_strategy PNG_DEPSTRUCT;         /* holds zlib compression
+                                               strategy */
+
+   png_uint_32 width PNG_DEPSTRUCT;         /* width of image in pixels */
+   png_uint_32 height PNG_DEPSTRUCT;        /* height of image in pixels */
+   png_uint_32 num_rows PNG_DEPSTRUCT;      /* number of rows in current pass */
+   png_uint_32 usr_width PNG_DEPSTRUCT;     /* width of row at start of write */
+   png_size_t rowbytes PNG_DEPSTRUCT;       /* size of row in bytes */
+#if 0 /* Replaced with the following in libpng-1.4.1 */
+   png_size_t irowbytes PNG_DEPSTRUCT;
+#endif
+/* Added in libpng-1.4.1 */
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   /* Total memory that a zTXt, sPLT, iTXt, iCCP, or unknown chunk
+    * can occupy when decompressed.  0 means unlimited.
+    * We will change the typedef from png_size_t to png_alloc_size_t
+    * in libpng-1.6.0
+    */
+   png_alloc_size_t user_chunk_malloc_max PNG_DEPSTRUCT;
+#endif
+   png_uint_32 iwidth PNG_DEPSTRUCT;        /* width of current interlaced
+                                               row in pixels */
+   png_uint_32 row_number PNG_DEPSTRUCT;    /* current row in interlace pass */
+   png_bytep prev_row PNG_DEPSTRUCT;        /* buffer to save previous
+                                               (unfiltered) row */
+   png_bytep row_buf PNG_DEPSTRUCT;         /* buffer to save current
+                                               (unfiltered) row */
+   png_bytep sub_row PNG_DEPSTRUCT;         /* buffer to save "sub" row
+                                               when filtering */
+   png_bytep up_row PNG_DEPSTRUCT;          /* buffer to save "up" row
+                                               when filtering */
+   png_bytep avg_row PNG_DEPSTRUCT;         /* buffer to save "avg" row
+                                               when filtering */
+   png_bytep paeth_row PNG_DEPSTRUCT;       /* buffer to save "Paeth" row
+                                               when filtering */
+   png_row_info row_info PNG_DEPSTRUCT;     /* used for transformation
+                                               routines */
+
+   png_uint_32 idat_size PNG_DEPSTRUCT;     /* current IDAT size for read */
+   png_uint_32 crc PNG_DEPSTRUCT;           /* current chunk CRC value */
+   png_colorp palette PNG_DEPSTRUCT;        /* palette from the input file */
+   png_uint_16 num_palette PNG_DEPSTRUCT;   /* number of color entries in
+                                               palette */
+   png_uint_16 num_trans PNG_DEPSTRUCT;     /* number of transparency values */
+   png_byte chunk_name[5] PNG_DEPSTRUCT;    /* null-terminated name of current
+                                               chunk */
+   png_byte compression PNG_DEPSTRUCT;      /* file compression type
+                                               (always 0) */
+   png_byte filter PNG_DEPSTRUCT;           /* file filter type (always 0) */
+   png_byte interlaced PNG_DEPSTRUCT;       /* PNG_INTERLACE_NONE,
+                                               PNG_INTERLACE_ADAM7 */
+   png_byte pass PNG_DEPSTRUCT;             /* current interlace pass (0 - 6) */
+   png_byte do_filter PNG_DEPSTRUCT;        /* row filter flags (see
+                                               PNG_FILTER_ below ) */
+   png_byte color_type PNG_DEPSTRUCT;       /* color type of file */
+   png_byte bit_depth PNG_DEPSTRUCT;        /* bit depth of file */
+   png_byte usr_bit_depth PNG_DEPSTRUCT;    /* bit depth of users row */
+   png_byte pixel_depth PNG_DEPSTRUCT;      /* number of bits per pixel */
+   png_byte channels PNG_DEPSTRUCT;         /* number of channels in file */
+   png_byte usr_channels PNG_DEPSTRUCT;     /* channels at start of write */
+   png_byte sig_bytes PNG_DEPSTRUCT;        /* magic bytes read/written from
+                                               start of file */
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+   png_uint_16 filler PNG_DEPSTRUCT;           /* filler bytes for pixel
+                                                  expansion */
+#endif
+
+#ifdef PNG_bKGD_SUPPORTED
+   png_byte background_gamma_type PNG_DEPSTRUCT;
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+   float background_gamma PNG_DEPSTRUCT;
+#  endif
+   png_color_16 background PNG_DEPSTRUCT;   /* background color in
+                                               screen gamma space */
+#ifdef PNG_READ_GAMMA_SUPPORTED
+   png_color_16 background_1 PNG_DEPSTRUCT; /* background normalized
+                                               to gamma 1.0 */
+#endif
+#endif /* PNG_bKGD_SUPPORTED */
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+   png_flush_ptr output_flush_fn PNG_DEPSTRUCT; /* Function for flushing
+                                               output */
+   png_uint_32 flush_dist PNG_DEPSTRUCT;    /* how many rows apart to flush,
+                                               0 - no flush */
+   png_uint_32 flush_rows PNG_DEPSTRUCT;    /* number of rows written since
+                                               last flush */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   int gamma_shift PNG_DEPSTRUCT;      /* number of "insignificant" bits
+                                          16-bit gamma */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float gamma PNG_DEPSTRUCT;          /* file gamma value */
+   float screen_gamma PNG_DEPSTRUCT;   /* screen gamma value
+                                          (display_exponent) */
+#endif
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep gamma_table PNG_DEPSTRUCT;     /* gamma table for 8-bit
+                                               depth files */
+   png_bytep gamma_from_1 PNG_DEPSTRUCT;    /* converts from 1.0 to screen */
+   png_bytep gamma_to_1 PNG_DEPSTRUCT;      /* converts from file to 1.0 */
+   png_uint_16pp gamma_16_table PNG_DEPSTRUCT; /* gamma table for 16-bit
+                                                  depth files */
+   png_uint_16pp gamma_16_from_1 PNG_DEPSTRUCT; /* converts from 1.0 to
+                                                   screen */
+   png_uint_16pp gamma_16_to_1 PNG_DEPSTRUCT; /* converts from file to 1.0 */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
+   png_color_8 sig_bit PNG_DEPSTRUCT;       /* significant bits in each
+                                               available channel */
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+   png_color_8 shift PNG_DEPSTRUCT;         /* shift for significant bit
+                                               tranformation */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
+ || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep trans_alpha PNG_DEPSTRUCT;           /* alpha values for
+                                                     paletted files */
+   png_color_16 trans_color PNG_DEPSTRUCT;  /* transparent color for
+                                               non-paletted files */
+#endif
+
+   png_read_status_ptr read_row_fn PNG_DEPSTRUCT;   /* called after each
+                                                       row is decoded */
+   png_write_status_ptr write_row_fn PNG_DEPSTRUCT; /* called after each
+                                                       row is encoded */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_progressive_info_ptr info_fn PNG_DEPSTRUCT; /* called after header
+                                                      data fully read */
+   png_progressive_row_ptr row_fn PNG_DEPSTRUCT;   /* called after each
+                                                      prog. row is decoded */
+   png_progressive_end_ptr end_fn PNG_DEPSTRUCT;   /* called after image
+                                                      is complete */
+   png_bytep save_buffer_ptr PNG_DEPSTRUCT;        /* current location in
+                                                      save_buffer */
+   png_bytep save_buffer PNG_DEPSTRUCT;            /* buffer for previously
+                                                      read data */
+   png_bytep current_buffer_ptr PNG_DEPSTRUCT;     /* current location in
+                                                      current_buffer */
+   png_bytep current_buffer PNG_DEPSTRUCT;         /* buffer for recently
+                                                      used data */
+   png_uint_32 push_length PNG_DEPSTRUCT;          /* size of current input
+                                                      chunk */
+   png_uint_32 skip_length PNG_DEPSTRUCT;          /* bytes to skip in
+                                                      input data */
+   png_size_t save_buffer_size PNG_DEPSTRUCT;      /* amount of data now
+                                                      in save_buffer */
+   png_size_t save_buffer_max PNG_DEPSTRUCT;       /* total size of
+                                                      save_buffer */
+   png_size_t buffer_size PNG_DEPSTRUCT;           /* total amount of
+                                                      available input data */
+   png_size_t current_buffer_size PNG_DEPSTRUCT;   /* amount of data now
+                                                      in current_buffer */
+   int process_mode PNG_DEPSTRUCT;                 /* what push library
+                                                      is currently doing */
+   int cur_palette PNG_DEPSTRUCT;                  /* current push library
+                                                      palette index */
+
+#  ifdef PNG_TEXT_SUPPORTED
+     png_size_t current_text_size PNG_DEPSTRUCT;   /* current size of
+                                                      text input data */
+     png_size_t current_text_left PNG_DEPSTRUCT;   /* how much text left
+                                                      to read in input */
+     png_charp current_text PNG_DEPSTRUCT;         /* current text chunk
+                                                      buffer */
+     png_charp current_text_ptr PNG_DEPSTRUCT;     /* current location
+                                                      in current_text */
+#  endif /* PNG_PROGRESSIVE_READ_SUPPORTED && PNG_TEXT_SUPPORTED */
+
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
+/* For the Borland special 64K segment handler */
+   png_bytepp offset_table_ptr PNG_DEPSTRUCT;
+   png_bytep offset_table PNG_DEPSTRUCT;
+   png_uint_16 offset_table_number PNG_DEPSTRUCT;
+   png_uint_16 offset_table_count PNG_DEPSTRUCT;
+   png_uint_16 offset_table_count_free PNG_DEPSTRUCT;
+#endif
+
+#ifdef PNG_READ_DITHER_SUPPORTED
+   png_bytep palette_lookup PNG_DEPSTRUCT; /* lookup table for dithering */
+   png_bytep dither_index PNG_DEPSTRUCT;   /* index translation for palette
+                                              files */
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED) || defined(PNG_hIST_SUPPORTED)
+   png_uint_16p hist PNG_DEPSTRUCT;                /* histogram */
+#endif
+
+#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
+   png_byte heuristic_method PNG_DEPSTRUCT;        /* heuristic for row
+                                                      filter selection */
+   png_byte num_prev_filters PNG_DEPSTRUCT;        /* number of weights
+                                                      for previous rows */
+   png_bytep prev_filters PNG_DEPSTRUCT;           /* filter type(s) of
+                                                      previous row(s) */
+   png_uint_16p filter_weights PNG_DEPSTRUCT;      /* weight(s) for previous
+                                                      line(s) */
+   png_uint_16p inv_filter_weights PNG_DEPSTRUCT;  /* 1/weight(s) for
+                                                      previous line(s) */
+   png_uint_16p filter_costs PNG_DEPSTRUCT;        /* relative filter
+                                                      calculation cost */
+   png_uint_16p inv_filter_costs PNG_DEPSTRUCT;    /* 1/relative filter
+                                                      calculation cost */
+#endif
+
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+   png_charp time_buffer PNG_DEPSTRUCT; /* String to hold RFC 1123 time text */
+#endif
+
+/* New members added in libpng-1.0.6 */
+
+   png_uint_32 free_me PNG_DEPSTRUCT;    /* flags items libpng is
+                                            responsible for freeing */
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+   png_voidp user_chunk_ptr PNG_DEPSTRUCT;
+   png_user_chunk_ptr read_user_chunk_fn PNG_DEPSTRUCT; /* user read
+                                                           chunk handler */
+#endif
+
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   int num_chunk_list PNG_DEPSTRUCT;
+   png_bytep chunk_list PNG_DEPSTRUCT;
+#endif
+
+/* New members added in libpng-1.0.3 */
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+   png_byte rgb_to_gray_status PNG_DEPSTRUCT;
+   /* These were changed from png_byte in libpng-1.0.6 */
+   png_uint_16 rgb_to_gray_red_coeff PNG_DEPSTRUCT;
+   png_uint_16 rgb_to_gray_green_coeff PNG_DEPSTRUCT;
+   png_uint_16 rgb_to_gray_blue_coeff PNG_DEPSTRUCT;
+#endif
+
+/* New member added in libpng-1.0.4 (renamed in 1.0.9) */
+#if defined(PNG_MNG_FEATURES_SUPPORTED) || \
+    defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+/* Changed from png_byte to png_uint_32 at version 1.2.0 */
+   png_uint_32 mng_features_permitted PNG_DEPSTRUCT;
+#endif
+
+/* New member added in libpng-1.0.7 */
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_fixed_point int_gamma PNG_DEPSTRUCT;
+#endif
+
+/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+   png_byte filter_type PNG_DEPSTRUCT;
+#endif
+
+/* New members added in libpng-1.2.0 */
+
+/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_voidp mem_ptr PNG_DEPSTRUCT;             /* user supplied struct for
+                                                   mem functions */
+   png_malloc_ptr malloc_fn PNG_DEPSTRUCT;      /* function for
+                                                   allocating memory */
+   png_free_ptr free_fn PNG_DEPSTRUCT;          /* function for
+                                                   freeing memory */
+#endif
+
+/* New member added in libpng-1.0.13 and 1.2.0 */
+   png_bytep big_row_buf PNG_DEPSTRUCT;         /* buffer to save current
+                                                   (unfiltered) row */
+
+#ifdef PNG_READ_DITHER_SUPPORTED
+/* The following three members were added at version 1.0.14 and 1.2.4 */
+   png_bytep dither_sort PNG_DEPSTRUCT;            /* working sort array */
+   png_bytep index_to_palette PNG_DEPSTRUCT;       /* where the original
+                                                     index currently is
+                                                     in the palette */
+   png_bytep palette_to_index PNG_DEPSTRUCT;       /* which original index
+                                                      points to this
+                                                      palette color */
+#endif
+
+/* New members added in libpng-1.0.16 and 1.2.6 */
+   png_byte compression_type PNG_DEPSTRUCT;
+
+#ifdef PNG_USER_LIMITS_SUPPORTED
+   png_uint_32 user_width_max PNG_DEPSTRUCT;
+   png_uint_32 user_height_max PNG_DEPSTRUCT;
+   /* Added in libpng-1.4.0: Total number of sPLT, text, and unknown
+    * chunks that can be stored (0 means unlimited).
+    */
+   png_uint_32 user_chunk_cache_max PNG_DEPSTRUCT;
+#endif
+
+/* New member added in libpng-1.0.25 and 1.2.17 */
+#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
+   /* Storage for unknown chunk that the library doesn't recognize. */
+   png_unknown_chunk unknown_chunk PNG_DEPSTRUCT;
+#endif
+
+/* New members added in libpng-1.2.26 */
+  png_uint_32 old_big_row_buf_size PNG_DEPSTRUCT;
+  png_uint_32 old_prev_row_size PNG_DEPSTRUCT;
+
+/* New member added in libpng-1.2.30 */
+  png_charp chunkdata PNG_DEPSTRUCT;  /* buffer for reading chunk data */
+
+#ifdef PNG_IO_STATE_SUPPORTED
+/* New member added in libpng-1.4.0 */
+   png_uint_32 io_state PNG_DEPSTRUCT;
+#endif
+};
+
+
+/* This triggers a compiler error in png.c, if png.c and png.h
+ * do not agree upon the version number.
+ */
+typedef png_structp version_1_4_1;
+
+typedef png_struct FAR * FAR * png_structpp;
+
+/* Here are the function definitions most commonly used.  This is not
+ * the place to find out how to use libpng.  See libpng.txt for the
+ * full explanation, see example.c for the summary.  This just provides
+ * a simple one line description of the use of each function.
+ */
+
+/* Returns the version number of the library */
+extern PNG_EXPORT(png_uint_32,png_access_version_number) PNGARG((void));
+
+/* Tell lib we have already handled the first <num_bytes> magic bytes.
+ * Handling more than 8 bytes from the beginning of the file is an error.
+ */
+extern PNG_EXPORT(void,png_set_sig_bytes) PNGARG((png_structp png_ptr,
+   int num_bytes));
+
+/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
+ * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
+ * signature, and non-zero otherwise.  Having num_to_check == 0 or
+ * start > 7 will always fail (ie return non-zero).
+ */
+extern PNG_EXPORT(int,png_sig_cmp) PNGARG((png_bytep sig, png_size_t start,
+   png_size_t num_to_check));
+
+/* Allocate and initialize png_ptr struct for reading, and any other memory. */
+extern PNG_EXPORT(png_structp,png_create_read_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)) PNG_ALLOCATED;
+
+/* Allocate and initialize png_ptr struct for writing, and any other memory */
+extern PNG_EXPORT(png_structp,png_create_write_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)) PNG_ALLOCATED;
+
+extern PNG_EXPORT(png_size_t,png_get_compression_buffer_size)
+   PNGARG((png_structp png_ptr));
+
+extern PNG_EXPORT(void,png_set_compression_buffer_size)
+   PNGARG((png_structp png_ptr, png_size_t size));
+
+/* Moved from pngconf.h in 1.4.0 and modified to ensure setjmp/longjmp
+ * match up.
+ */
+#ifdef PNG_SETJMP_SUPPORTED
+/* This function returns the jmp_buf built in to *png_ptr.  It must be
+ * supplied with an appropriate 'longjmp' function to use on that jmp_buf
+ * unless the default error function is overridden in which case NULL is
+ * acceptable.  The size of the jmp_buf is checked against the actual size
+ * allocated by the library - the call will return NULL on a mismatch
+ * indicating an ABI mismatch.
+ */
+extern PNG_EXPORT(jmp_buf*, png_set_longjmp_fn)
+   PNGARG((png_structp png_ptr, png_longjmp_ptr longjmp_fn, size_t
+       jmp_buf_size));
+#  define png_jmpbuf(png_ptr) \
+   (*png_set_longjmp_fn((png_ptr), longjmp, sizeof (jmp_buf)))
+#else
+#  define png_jmpbuf(png_ptr) \
+   (LIBPNG_WAS_COMPILED_WITH__PNG_NO_SETJMP)
+#endif
+
+/* Reset the compression stream */
+extern PNG_EXPORT(int,png_reset_zstream) PNGARG((png_structp png_ptr));
+
+/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_structp,png_create_read_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)) PNG_ALLOCATED;
+extern PNG_EXPORT(png_structp,png_create_write_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)) PNG_ALLOCATED;
+#endif
+
+/* Write the PNG file signature. */
+extern PNG_EXPORT(void,png_write_sig) PNGARG((png_structp png_ptr));
+
+/* Write a PNG chunk - size, type, (optional) data, CRC. */
+extern PNG_EXPORT(void,png_write_chunk) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_bytep data, png_size_t length));
+
+/* Write the start of a PNG chunk - length and chunk name. */
+extern PNG_EXPORT(void,png_write_chunk_start) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_uint_32 length));
+
+/* Write the data of a PNG chunk started with png_write_chunk_start(). */
+extern PNG_EXPORT(void,png_write_chunk_data) PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+/* Finish a chunk started with png_write_chunk_start() (includes CRC). */
+extern PNG_EXPORT(void,png_write_chunk_end) PNGARG((png_structp png_ptr));
+
+/* Allocate and initialize the info structure */
+extern PNG_EXPORT(png_infop,png_create_info_struct)
+   PNGARG((png_structp png_ptr)) PNG_ALLOCATED;
+
+extern PNG_EXPORT(void,png_info_init_3) PNGARG((png_infopp info_ptr,
+    png_size_t png_info_struct_size));
+
+/* Writes all the PNG information before the image. */
+extern PNG_EXPORT(void,png_write_info_before_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+extern PNG_EXPORT(void,png_write_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the information before the actual image data. */
+extern PNG_EXPORT(void,png_read_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+#ifdef PNG_TIME_RFC1123_SUPPORTED
+extern PNG_EXPORT(png_charp,png_convert_to_rfc1123)
+   PNGARG((png_structp png_ptr, png_timep ptime));
+#endif
+
+#ifdef PNG_CONVERT_tIME_SUPPORTED
+/* Convert from a struct tm to png_time */
+extern PNG_EXPORT(void,png_convert_from_struct_tm) PNGARG((png_timep ptime,
+   struct tm FAR * ttime));
+
+/* Convert from time_t to png_time.  Uses gmtime() */
+extern PNG_EXPORT(void,png_convert_from_time_t) PNGARG((png_timep ptime,
+   time_t ttime));
+#endif /* PNG_CONVERT_tIME_SUPPORTED */
+
+#ifdef PNG_READ_EXPAND_SUPPORTED
+/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
+extern PNG_EXPORT(void,png_set_expand) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(void,png_set_expand_gray_1_2_4_to_8) PNGARG((png_structp
+  png_ptr));
+extern PNG_EXPORT(void,png_set_palette_to_rgb) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(void,png_set_tRNS_to_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* Use blue, green, red order for pixels. */
+extern PNG_EXPORT(void,png_set_bgr) PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_READ_GRAY_TO_RGB_SUPPORTED
+/* Expand the grayscale to 24-bit RGB if necessary. */
+extern PNG_EXPORT(void,png_set_gray_to_rgb) PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_READ_RGB_TO_GRAY_SUPPORTED
+/* Reduce RGB to grayscale. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_rgb_to_gray) PNGARG((png_structp png_ptr,
+   int error_action, double red, double green ));
+#endif
+extern PNG_EXPORT(void,png_set_rgb_to_gray_fixed) PNGARG((png_structp png_ptr,
+   int error_action, png_fixed_point red, png_fixed_point green ));
+extern PNG_EXPORT(png_byte,png_get_rgb_to_gray_status) PNGARG((png_structp
+   png_ptr));
+#endif
+
+extern PNG_EXPORT(void,png_build_grayscale_palette) PNGARG((int bit_depth,
+   png_colorp palette));
+
+#ifdef PNG_READ_STRIP_ALPHA_SUPPORTED
+extern PNG_EXPORT(void,png_set_strip_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_swap_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_invert_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte to 8-bit Gray or 24-bit RGB images. */
+extern PNG_EXPORT(void,png_set_filler) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+/* The values of the PNG_FILLER_ defines should NOT be changed */
+#define PNG_FILLER_BEFORE 0
+#define PNG_FILLER_AFTER 1
+/* Add an alpha byte to 8-bit Gray or 24-bit RGB images. */
+extern PNG_EXPORT(void,png_set_add_alpha) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+#endif /* PNG_READ_FILLER_SUPPORTED || PNG_WRITE_FILLER_SUPPORTED */
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* Swap bytes in 16-bit depth files. */
+extern PNG_EXPORT(void,png_set_swap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
+extern PNG_EXPORT(void,png_set_packing) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || \
+    defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* Swap packing order of pixels in bytes. */
+extern PNG_EXPORT(void,png_set_packswap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Converts files to legal bit depths. */
+extern PNG_EXPORT(void,png_set_shift) PNGARG((png_structp png_ptr,
+   png_color_8p true_bits));
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Have the code handle the interlacing.  Returns the number of passes. */
+extern PNG_EXPORT(int,png_set_interlace_handling) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+/* Invert monochrome files */
+extern PNG_EXPORT(void,png_set_invert_mono) PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_READ_BACKGROUND_SUPPORTED
+/* Handle alpha and tRNS by replacing with a background color. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_background) PNGARG((png_structp png_ptr,
+   png_color_16p background_color, int background_gamma_code,
+   int need_expand, double background_gamma));
+#endif
+#define PNG_BACKGROUND_GAMMA_UNKNOWN 0
+#define PNG_BACKGROUND_GAMMA_SCREEN  1
+#define PNG_BACKGROUND_GAMMA_FILE    2
+#define PNG_BACKGROUND_GAMMA_UNIQUE  3
+#endif
+
+#ifdef PNG_READ_16_TO_8_SUPPORTED
+/* Strip the second byte of information from a 16-bit depth file. */
+extern PNG_EXPORT(void,png_set_strip_16) PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_READ_DITHER_SUPPORTED
+/* Turn on dithering, and reduce the palette to the number of colors
+ * available.
+ */
+extern PNG_EXPORT(void,png_set_dither) PNGARG((png_structp png_ptr,
+   png_colorp palette, int num_palette, int maximum_colors,
+   png_uint_16p histogram, int full_dither));
+#endif
+
+#ifdef PNG_READ_GAMMA_SUPPORTED
+/* Handle gamma correction. Screen_gamma=(display_exponent) */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gamma) PNGARG((png_structp png_ptr,
+   double screen_gamma, double default_file_gamma));
+#endif
+#endif
+
+
+#ifdef PNG_WRITE_FLUSH_SUPPORTED
+/* Set how many lines between output flushes - 0 for no flushing */
+extern PNG_EXPORT(void,png_set_flush) PNGARG((png_structp png_ptr, int nrows));
+/* Flush the current PNG output buffer */
+extern PNG_EXPORT(void,png_write_flush) PNGARG((png_structp png_ptr));
+#endif
+
+/* Optional update palette with requested transformations */
+extern PNG_EXPORT(void,png_start_read_image) PNGARG((png_structp png_ptr));
+
+/* Optional call to update the users info structure */
+extern PNG_EXPORT(void,png_read_update_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read one or more rows of image data. */
+extern PNG_EXPORT(void,png_read_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_bytepp display_row, png_uint_32 num_rows));
+#endif
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read a row of data. */
+extern PNG_EXPORT(void,png_read_row) PNGARG((png_structp png_ptr,
+   png_bytep row,
+   png_bytep display_row));
+#endif
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the whole image into memory at once. */
+extern PNG_EXPORT(void,png_read_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+#endif
+
+/* Write a row of image data */
+extern PNG_EXPORT(void,png_write_row) PNGARG((png_structp png_ptr,
+   png_bytep row));
+
+/* Write a few rows of image data */
+extern PNG_EXPORT(void,png_write_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_uint_32 num_rows));
+
+/* Write the image data */
+extern PNG_EXPORT(void,png_write_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+
+/* Write the end of the PNG file. */
+extern PNG_EXPORT(void,png_write_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
+/* Read the end of the PNG file. */
+extern PNG_EXPORT(void,png_read_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+/* Free any memory associated with the png_info_struct */
+extern PNG_EXPORT(void,png_destroy_info_struct) PNGARG((png_structp png_ptr,
+   png_infopp info_ptr_ptr));
+
+/* Free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_read_struct) PNGARG((png_structpp
+   png_ptr_ptr, png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
+
+/* Free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_write_struct)
+   PNGARG((png_structpp png_ptr_ptr, png_infopp info_ptr_ptr));
+
+/* Set the libpng method of handling chunk CRC errors */
+extern PNG_EXPORT(void,png_set_crc_action) PNGARG((png_structp png_ptr,
+   int crit_action, int ancil_action));
+
+/* Values for png_set_crc_action() to say how to handle CRC errors in
+ * ancillary and critical chunks, and whether to use the data contained
+ * therein.  Note that it is impossible to "discard" data in a critical
+ * chunk.  For versions prior to 0.90, the action was always error/quit,
+ * whereas in version 0.90 and later, the action for CRC errors in ancillary
+ * chunks is warn/discard.  These values should NOT be changed.
+ *
+ *      value                       action:critical     action:ancillary
+ */
+#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
+#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
+#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
+#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
+#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
+#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
+
+/* These functions give the user control over the scan-line filtering in
+ * libpng and the compression methods used by zlib.  These functions are
+ * mainly useful for testing, as the defaults should work with most users.
+ * Those users who are tight on memory or want faster performance at the
+ * expense of compression can modify them.  See the compression library
+ * header file (zlib.h) for an explination of the compression functions.
+ */
+
+/* Set the filtering method(s) used by libpng.  Currently, the only valid
+ * value for "method" is 0.
+ */
+extern PNG_EXPORT(void,png_set_filter) PNGARG((png_structp png_ptr, int method,
+   int filters));
+
+/* Flags for png_set_filter() to say which filters to use.  The flags
+ * are chosen so that they don't conflict with real filter types
+ * below, in case they are supplied instead of the #defined constants.
+ * These values should NOT be changed.
+ */
+#define PNG_NO_FILTERS     0x00
+#define PNG_FILTER_NONE    0x08
+#define PNG_FILTER_SUB     0x10
+#define PNG_FILTER_UP      0x20
+#define PNG_FILTER_AVG     0x40
+#define PNG_FILTER_PAETH   0x80
+#define PNG_ALL_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP | \
+                         PNG_FILTER_AVG | PNG_FILTER_PAETH)
+
+/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
+ * These defines should NOT be changed.
+ */
+#define PNG_FILTER_VALUE_NONE  0
+#define PNG_FILTER_VALUE_SUB   1
+#define PNG_FILTER_VALUE_UP    2
+#define PNG_FILTER_VALUE_AVG   3
+#define PNG_FILTER_VALUE_PAETH 4
+#define PNG_FILTER_VALUE_LAST  5
+
+#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED /* EXPERIMENTAL */
+/* The "heuristic_method" is given by one of the PNG_FILTER_HEURISTIC_
+ * defines, either the default (minimum-sum-of-absolute-differences), or
+ * the experimental method (weighted-minimum-sum-of-absolute-differences).
+ *
+ * Weights are factors >= 1.0, indicating how important it is to keep the
+ * filter type consistent between rows.  Larger numbers mean the current
+ * filter is that many times as likely to be the same as the "num_weights"
+ * previous filters.  This is cumulative for each previous row with a weight.
+ * There needs to be "num_weights" values in "filter_weights", or it can be
+ * NULL if the weights aren't being specified.  Weights have no influence on
+ * the selection of the first row filter.  Well chosen weights can (in theory)
+ * improve the compression for a given image.
+ *
+ * Costs are factors >= 1.0 indicating the relative decoding costs of a
+ * filter type.  Higher costs indicate more decoding expense, and are
+ * therefore less likely to be selected over a filter with lower computational
+ * costs.  There needs to be a value in "filter_costs" for each valid filter
+ * type (given by PNG_FILTER_VALUE_LAST), or it can be NULL if you aren't
+ * setting the costs.  Costs try to improve the speed of decompression without
+ * unduly increasing the compressed image size.
+ *
+ * A negative weight or cost indicates the default value is to be used, and
+ * values in the range [0.0, 1.0) indicate the value is to remain unchanged.
+ * The default values for both weights and costs are currently 1.0, but may
+ * change if good general weighting/cost heuristics can be found.  If both
+ * the weights and costs are set to 1.0, this degenerates the WEIGHTED method
+ * to the UNWEIGHTED method, but with added encoding time/computation.
+ */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_filter_heuristics) PNGARG((png_structp png_ptr,
+   int heuristic_method, int num_weights, png_doublep filter_weights,
+   png_doublep filter_costs));
+#endif
+#endif /*  PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
+
+/* Heuristic used for row filter selection.  These defines should NOT be
+ * changed.
+ */
+#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
+#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
+#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
+#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
+
+/* Set the library compression level.  Currently, valid values range from
+ * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
+ * (0 - no compression, 9 - "maximal" compression).  Note that tests have
+ * shown that zlib compression levels 3-6 usually perform as well as level 9
+ * for PNG images, and do considerably fewer caclulations.  In the future,
+ * these values may not correspond directly to the zlib compression levels.
+ */
+extern PNG_EXPORT(void,png_set_compression_level) PNGARG((png_structp png_ptr,
+   int level));
+
+extern PNG_EXPORT(void,png_set_compression_mem_level)
+   PNGARG((png_structp png_ptr, int mem_level));
+
+extern PNG_EXPORT(void,png_set_compression_strategy)
+   PNGARG((png_structp png_ptr, int strategy));
+
+extern PNG_EXPORT(void,png_set_compression_window_bits)
+   PNGARG((png_structp png_ptr, int window_bits));
+
+extern PNG_EXPORT(void,png_set_compression_method) PNGARG((png_structp png_ptr,
+   int method));
+
+/* These next functions are called for input/output, memory, and error
+ * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
+ * and call standard C I/O routines such as fread(), fwrite(), and
+ * fprintf().  These functions can be made to use other I/O routines
+ * at run time for those applications that need to handle I/O in a
+ * different manner by calling png_set_???_fn().  See libpng.txt for
+ * more information.
+ */
+
+#ifdef PNG_STDIO_SUPPORTED
+/* Initialize the input/output for the PNG file to the default functions. */
+extern PNG_EXPORT(void,png_init_io) PNGARG((png_structp png_ptr,
+    png_FILE_p fp));
+#endif
+
+/* Replace the (error and abort), and warning functions with user
+ * supplied functions.  If no messages are to be printed you must still
+ * write and use replacement functions. The replacement error_fn should
+ * still do a longjmp to the last setjmp location if you are using this
+ * method of error handling.  If error_fn or warning_fn is NULL, the
+ * default function will be used.
+ */
+
+extern PNG_EXPORT(void,png_set_error_fn) PNGARG((png_structp png_ptr,
+   png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn));
+
+/* Return the user pointer associated with the error functions */
+extern PNG_EXPORT(png_voidp,png_get_error_ptr) PNGARG((png_structp png_ptr));
+
+/* Replace the default data output functions with a user supplied one(s).
+ * If buffered output is not used, then output_flush_fn can be set to NULL.
+ * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
+ * output_flush_fn will be ignored (and thus can be NULL).
+ * It is probably a mistake to use NULL for output_flush_fn if
+ * write_data_fn is not also NULL unless you have built libpng with
+ * PNG_WRITE_FLUSH_SUPPORTED undefined, because in this case libpng's
+ * default flush function, which uses the standard *FILE structure, will
+ * be used.
+ */
+extern PNG_EXPORT(void,png_set_write_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
+
+/* Replace the default data input function with a user supplied one. */
+extern PNG_EXPORT(void,png_set_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr read_data_fn));
+
+/* Return the user pointer associated with the I/O functions */
+extern PNG_EXPORT(png_voidp,png_get_io_ptr) PNGARG((png_structp png_ptr));
+
+extern PNG_EXPORT(void,png_set_read_status_fn) PNGARG((png_structp png_ptr,
+   png_read_status_ptr read_row_fn));
+
+extern PNG_EXPORT(void,png_set_write_status_fn) PNGARG((png_structp png_ptr,
+   png_write_status_ptr write_row_fn));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* Replace the default memory allocation functions with user supplied one(s). */
+extern PNG_EXPORT(void,png_set_mem_fn) PNGARG((png_structp png_ptr,
+   png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+/* Return the user pointer associated with the memory functions */
+extern PNG_EXPORT(png_voidp,png_get_mem_ptr) PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_READ_USER_TRANSFORM_SUPPORTED
+extern PNG_EXPORT(void,png_set_read_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr read_user_transform_fn));
+#endif
+
+#ifdef PNG_WRITE_USER_TRANSFORM_SUPPORTED
+extern PNG_EXPORT(void,png_set_write_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr write_user_transform_fn));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+extern PNG_EXPORT(void,png_set_user_transform_info) PNGARG((png_structp
+   png_ptr, png_voidp user_transform_ptr, int user_transform_depth,
+   int user_transform_channels));
+/* Return the user pointer associated with the user transform functions */
+extern PNG_EXPORT(png_voidp,png_get_user_transform_ptr)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+extern PNG_EXPORT(void,png_set_read_user_chunk_fn) PNGARG((png_structp png_ptr,
+   png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
+extern PNG_EXPORT(png_voidp,png_get_user_chunk_ptr) PNGARG((png_structp
+   png_ptr));
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+/* Sets the function callbacks for the push reader, and a pointer to a
+ * user-defined structure available to the callback functions.
+ */
+extern PNG_EXPORT(void,png_set_progressive_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp progressive_ptr,
+   png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
+   png_progressive_end_ptr end_fn));
+
+/* Returns the user pointer associated with the push read functions */
+extern PNG_EXPORT(png_voidp,png_get_progressive_ptr)
+   PNGARG((png_structp png_ptr));
+
+/* Function to be called when data becomes available */
+extern PNG_EXPORT(void,png_process_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep buffer, png_size_t buffer_size));
+
+/* Function that combines rows.  Not very much different than the
+ * png_combine_row() call.  Is this even used?????
+ */
+extern PNG_EXPORT(void,png_progressive_combine_row) PNGARG((png_structp png_ptr,
+   png_bytep old_row, png_bytep new_row));
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+extern PNG_EXPORT(png_voidp,png_malloc) PNGARG((png_structp png_ptr,
+   png_alloc_size_t size)) PNG_ALLOCATED;
+/* Added at libpng version 1.4.0 */
+extern PNG_EXPORT(png_voidp,png_calloc) PNGARG((png_structp png_ptr,
+   png_alloc_size_t size)) PNG_ALLOCATED;
+
+/* Added at libpng version 1.2.4 */
+extern PNG_EXPORT(png_voidp,png_malloc_warn) PNGARG((png_structp png_ptr,
+   png_alloc_size_t size)) PNG_ALLOCATED;
+
+/* Frees a pointer allocated by png_malloc() */
+extern PNG_EXPORT(void,png_free) PNGARG((png_structp png_ptr, png_voidp ptr));
+
+/* Free data that was allocated internally */
+extern PNG_EXPORT(void,png_free_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 free_me, int num));
+/* Reassign responsibility for freeing existing data, whether allocated
+ * by libpng or by the application */
+extern PNG_EXPORT(void,png_data_freer) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int freer, png_uint_32 mask));
+/* Assignments for png_data_freer */
+#define PNG_DESTROY_WILL_FREE_DATA 1
+#define PNG_SET_WILL_FREE_DATA 1
+#define PNG_USER_WILL_FREE_DATA 2
+/* Flags for png_ptr->free_me and info_ptr->free_me */
+#define PNG_FREE_HIST 0x0008
+#define PNG_FREE_ICCP 0x0010
+#define PNG_FREE_SPLT 0x0020
+#define PNG_FREE_ROWS 0x0040
+#define PNG_FREE_PCAL 0x0080
+#define PNG_FREE_SCAL 0x0100
+#define PNG_FREE_UNKN 0x0200
+#define PNG_FREE_LIST 0x0400
+#define PNG_FREE_PLTE 0x1000
+#define PNG_FREE_TRNS 0x2000
+#define PNG_FREE_TEXT 0x4000
+#define PNG_FREE_ALL  0x7fff
+#define PNG_FREE_MUL  0x4220 /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
+
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_voidp,png_malloc_default) PNGARG((png_structp png_ptr,
+   png_alloc_size_t size)) PNG_ALLOCATED;
+extern PNG_EXPORT(void,png_free_default) PNGARG((png_structp png_ptr,
+   png_voidp ptr));
+#endif
+
+#ifndef PNG_NO_ERROR_TEXT
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message)) PNG_NORETURN;
+
+/* The same, but the chunk name is prepended to the error string. */
+extern PNG_EXPORT(void,png_chunk_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message)) PNG_NORETURN;
+
+#else
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_err) PNGARG((png_structp png_ptr)) PNG_NORETURN;
+#endif
+
+/* Non-fatal error in libpng.  Can continue, but may have a problem. */
+extern PNG_EXPORT(void,png_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+/* Non-fatal error in libpng, chunk name is prepended to message. */
+extern PNG_EXPORT(void,png_chunk_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+#ifdef PNG_BENIGN_ERRORS_SUPPORTED
+/* Benign error in libpng.  Can continue, but may have a problem.
+ * User can choose whether to handle as a fatal error or as a warning. */
+extern PNG_EXPORT(void,png_benign_error) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+/* Same, chunk name is prepended to message. */
+extern PNG_EXPORT(void,png_chunk_benign_error) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+extern PNG_EXPORT(void,png_set_benign_errors) PNGARG((png_structp
+   png_ptr, int allowed));
+#endif
+
+/* The png_set_<chunk> functions are for storing values in the png_info_struct.
+ * Similarly, the png_get_<chunk> calls are used to read values from the
+ * png_info_struct, either storing the parameters in the passed variables, or
+ * setting pointers into the png_info_struct where the data is stored.  The
+ * png_get_<chunk> functions return a non-zero value if the data was available
+ * in info_ptr, or return zero and do not change any of the parameters if the
+ * data was not available.
+ *
+ * These functions should be used instead of directly accessing png_info
+ * to avoid problems with future changes in the size and internal layout of
+ * png_info_struct.
+ */
+/* Returns "flag" if chunk data is valid in info_ptr. */
+extern PNG_EXPORT(png_uint_32,png_get_valid) PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 flag));
+
+/* Returns number of bytes needed to hold a transformed row. */
+extern PNG_EXPORT(png_size_t,png_get_rowbytes) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+/* Returns row_pointers, which is an array of pointers to scanlines that was
+ * returned from png_read_png().
+ */
+extern PNG_EXPORT(png_bytepp,png_get_rows) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+/* Set row_pointers, which is an array of pointers to scanlines for use
+ * by png_write_png().
+ */
+extern PNG_EXPORT(void,png_set_rows) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytepp row_pointers));
+#endif
+
+/* Returns number of color channels in image. */
+extern PNG_EXPORT(png_byte,png_get_channels) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* Returns image width in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_width) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image height in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_height) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image bit_depth. */
+extern PNG_EXPORT(png_byte, png_get_bit_depth) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image color_type. */
+extern PNG_EXPORT(png_byte, png_get_color_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image filter_type. */
+extern PNG_EXPORT(png_byte, png_get_filter_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image interlace_type. */
+extern PNG_EXPORT(png_byte, png_get_interlace_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image compression_type. */
+extern PNG_EXPORT(png_byte, png_get_compression_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image resolution in pixels per meter, from pHYs chunk data. */
+extern PNG_EXPORT(png_uint_32, png_get_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_x_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_y_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns pixel aspect ratio, computed from pHYs chunk data.  */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(float, png_get_pixel_aspect_ratio) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+#endif
+
+/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
+extern PNG_EXPORT(png_int_32, png_get_x_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_x_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+#endif /* PNG_EASY_ACCESS_SUPPORTED */
+
+/* Returns pointer to signature string read from PNG header */
+extern PNG_EXPORT(png_bytep,png_get_signature) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_bKGD_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p *background));
+#endif
+
+#ifdef PNG_bKGD_SUPPORTED
+extern PNG_EXPORT(void,png_set_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p background));
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *white_x, double *white_y, double *red_x,
+   double *red_y, double *green_x, double *green_y, double *blue_x,
+   double *blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_white_x, png_fixed_point
+   *int_white_y, png_fixed_point *int_red_x, png_fixed_point *int_red_y,
+   png_fixed_point *int_green_x, png_fixed_point *int_green_y, png_fixed_point
+   *int_blue_x, png_fixed_point *int_blue_y));
+#endif
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double white_x, double white_y, double red_x,
+   double red_y, double green_x, double green_y, double blue_x, double blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
+
+#ifdef PNG_gAMA_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *file_gamma));
+#endif
+extern PNG_EXPORT(png_uint_32,png_get_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_file_gamma));
+#endif
+
+#ifdef PNG_gAMA_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double file_gamma));
+#endif
+extern PNG_EXPORT(void,png_set_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_file_gamma));
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p *hist));
+#endif
+
+#ifdef PNG_hIST_SUPPORTED
+extern PNG_EXPORT(void,png_set_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p hist));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *width, png_uint_32 *height,
+   int *bit_depth, int *color_type, int *interlace_method,
+   int *compression_method, int *filter_method));
+
+extern PNG_EXPORT(void,png_set_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth,
+   int color_type, int interlace_method, int compression_method,
+   int filter_method));
+
+#ifdef PNG_oFFs_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 *offset_x, png_int_32 *offset_y,
+   int *unit_type));
+#endif
+
+#ifdef PNG_oFFs_SUPPORTED
+extern PNG_EXPORT(void,png_set_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 offset_x, png_int_32 offset_y,
+   int unit_type));
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp *purpose, png_int_32 *X0, png_int_32 *X1,
+   int *type, int *nparams, png_charp *units, png_charpp *params));
+#endif
+
+#ifdef PNG_pCAL_SUPPORTED
+extern PNG_EXPORT(void,png_set_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp purpose, png_int_32 X0, png_int_32 X1,
+   int type, int nparams, png_charp units, png_charpp params));
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif
+
+#ifdef PNG_pHYs_SUPPORTED
+extern PNG_EXPORT(void,png_set_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp *palette, int *num_palette));
+
+extern PNG_EXPORT(void,png_set_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp palette, int num_palette));
+
+#ifdef PNG_sBIT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p *sig_bit));
+#endif
+
+#ifdef PNG_sBIT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p sig_bit));
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *intent));
+#endif
+
+#ifdef PNG_sRGB_SUPPORTED
+extern PNG_EXPORT(void,png_set_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+extern PNG_EXPORT(void,png_set_sRGB_gAMA_and_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charpp name, int *compression_type,
+   png_charpp profile, png_uint_32 *proflen));
+   /* Note to maintainer: profile should be png_bytepp */
+#endif
+
+#ifdef PNG_iCCP_SUPPORTED
+extern PNG_EXPORT(void,png_set_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp name, int compression_type,
+   png_charp profile, png_uint_32 proflen));
+   /* Note to maintainer: profile should be png_bytep */
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tpp entries));
+#endif
+
+#ifdef PNG_sPLT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tp entries, int nentries));
+#endif
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_get_text also returns the number of text chunks in *num_text */
+extern PNG_EXPORT(png_uint_32,png_get_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp *text_ptr, int *num_text));
+#endif
+
+/* Note while png_set_text() will accept a structure whose text,
+ * language, and  translated keywords are NULL pointers, the structure
+ * returned by png_get_text will always contain regular
+ * zero-terminated C strings.  They might be empty strings but
+ * they will never be NULL pointers.
+ */
+
+#ifdef PNG_TEXT_SUPPORTED
+extern PNG_EXPORT(void,png_set_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep *mod_time));
+#endif
+
+#ifdef PNG_tIME_SUPPORTED
+extern PNG_EXPORT(void,png_set_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep mod_time));
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep *trans_alpha, int *num_trans,
+   png_color_16p *trans_color));
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+extern PNG_EXPORT(void,png_set_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep trans_alpha, int num_trans,
+   png_color_16p trans_color));
+#endif
+
+#ifdef PNG_tRNS_SUPPORTED
+#endif
+
+#ifdef PNG_sCAL_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, double *width, double *height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, png_charpp swidth, png_charpp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED */
+
+#ifdef PNG_sCAL_SUPPORTED
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, png_charp swidth, png_charp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED || PNG_WRITE_sCAL_SUPPORTED */
+
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+/* Provide a list of chunks and how they are to be handled, if the built-in
+   handling or default unknown chunk handling is not desired.  Any chunks not
+   listed will be handled in the default manner.  The IHDR and IEND chunks
+   must not be listed.
+      keep = 0: follow default behaviour
+           = 1: do not keep
+           = 2: keep only if safe-to-copy
+           = 3: keep even if unsafe-to-copy
+*/
+extern PNG_EXPORT(void, png_set_keep_unknown_chunks) PNGARG((png_structp
+   png_ptr, int keep, png_bytep chunk_list, int num_chunks));
+PNG_EXPORT(int,png_handle_as_unknown) PNGARG((png_structp png_ptr, png_bytep
+   chunk_name));
+#endif
+#ifdef PNG_UNKNOWN_CHUNKS_SUPPORTED
+extern PNG_EXPORT(void, png_set_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns));
+extern PNG_EXPORT(void, png_set_unknown_chunk_location)
+   PNGARG((png_structp png_ptr, png_infop info_ptr, int chunk, int location));
+extern PNG_EXPORT(png_uint_32,png_get_unknown_chunks) PNGARG((png_structp
+   png_ptr, png_infop info_ptr, png_unknown_chunkpp entries));
+#endif
+
+/* Png_free_data() will turn off the "valid" flag for anything it frees.
+ * If you need to turn it off for a chunk that your application has freed,
+ * you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK);
+ */
+extern PNG_EXPORT(void, png_set_invalid) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int mask));
+
+#ifdef PNG_INFO_IMAGE_SUPPORTED
+/* The "params" pointer is currently not used and is for future expansion. */
+extern PNG_EXPORT(void, png_read_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+extern PNG_EXPORT(void, png_write_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+#endif
+
+extern PNG_EXPORT(png_charp,png_get_copyright) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_ver) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_version) PNGARG((png_structp
+    png_ptr));
+extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr));
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_permit_mng_features) PNGARG((png_structp
+   png_ptr, png_uint_32 mng_features_permitted));
+#endif
+
+/* For use in png_set_keep_unknown, added to version 1.2.6 */
+#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
+#define PNG_HANDLE_CHUNK_NEVER        1
+#define PNG_HANDLE_CHUNK_IF_SAFE      2
+#define PNG_HANDLE_CHUNK_ALWAYS       3
+
+/* Strip the prepended error numbers ("#nnn ") from error and warning
+ * messages before passing them to the error or warning handler.
+ */
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+extern PNG_EXPORT(void,png_set_strip_error_numbers) PNGARG((png_structp
+   png_ptr, png_uint_32 strip_mode));
+#endif
+
+/* Added in libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+extern PNG_EXPORT(void,png_set_user_limits) PNGARG((png_structp
+   png_ptr, png_uint_32 user_width_max, png_uint_32 user_height_max));
+extern PNG_EXPORT(png_uint_32,png_get_user_width_max) PNGARG((png_structp
+   png_ptr));
+extern PNG_EXPORT(png_uint_32,png_get_user_height_max) PNGARG((png_structp
+   png_ptr));
+/* Added in libpng-1.4.0 */
+extern PNG_EXPORT(void,png_set_chunk_cache_max) PNGARG((png_structp
+   png_ptr, png_uint_32 user_chunk_cache_max));
+extern PNG_EXPORT(png_uint_32,png_get_chunk_cache_max)
+   PNGARG((png_structp png_ptr));
+/* Added in libpng-1.4.1 */
+extern PNG_EXPORT(void,png_set_chunk_malloc_max) PNGARG((png_structp
+   png_ptr, png_alloc_size_t user_chunk_cache_max));
+extern PNG_EXPORT(png_alloc_size_t,png_get_chunk_malloc_max)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
+PNG_EXPORT(png_uint_32,png_get_pixels_per_inch) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXPORT(png_uint_32,png_get_x_pixels_per_inch) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXPORT(png_uint_32,png_get_y_pixels_per_inch) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXPORT(float,png_get_x_offset_inches) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXPORT(float,png_get_y_offset_inches) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_pHYs_SUPPORTED
+PNG_EXPORT(png_uint_32,png_get_pHYs_dpi) PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif /* PNG_pHYs_SUPPORTED */
+#endif  /* PNG_INCH_CONVERSIONS && PNG_FLOATING_POINT_SUPPORTED */
+
+/* Added in libpng-1.4.0 */
+#ifdef PNG_IO_STATE_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_io_state) PNGARG((png_structp png_ptr));
+
+extern PNG_EXPORT(png_bytep,png_get_io_chunk_name)
+   PNGARG((png_structp png_ptr));
+
+/* The flags returned by png_get_io_state() are the following: */
+#define PNG_IO_NONE        0x0000   /* no I/O at this moment */
+#define PNG_IO_READING     0x0001   /* currently reading */
+#define PNG_IO_WRITING     0x0002   /* currently writing */
+#define PNG_IO_SIGNATURE   0x0010   /* currently at the file signature */
+#define PNG_IO_CHUNK_HDR   0x0020   /* currently at the chunk header */
+#define PNG_IO_CHUNK_DATA  0x0040   /* currently at the chunk data */
+#define PNG_IO_CHUNK_CRC   0x0080   /* currently at the chunk crc */
+#define PNG_IO_MASK_OP     0x000f   /* current operation: reading/writing */
+#define PNG_IO_MASK_LOC    0x00f0   /* current location: sig/hdr/data/crc */
+#endif /* ?PNG_IO_STATE_SUPPORTED */
+
+/* Maintainer: Put new public prototypes here ^, in libpng.3, and project
+ * defs
+ */
+
+#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
+/* With these routines we avoid an integer divide, which will be slower on
+ * most machines.  However, it does take more operations than the corresponding
+ * divide method, so it may be slower on a few RISC systems.  There are two
+ * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
+ *
+ * Note that the rounding factors are NOT supposed to be the same!  128 and
+ * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
+ * standard method.
+ *
+ * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
+ */
+
+ /* fg and bg should be in `gamma 1.0' space; alpha is the opacity          */
+
+#  define png_composite(composite, fg, alpha, bg)         \
+     { png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) \
+           * (png_uint_16)(alpha)                         \
+           + (png_uint_16)(bg)*(png_uint_16)(255          \
+           - (png_uint_16)(alpha)) + (png_uint_16)128);   \
+       (composite) = (png_byte)((temp + (temp >> 8)) >> 8); }
+
+#  define png_composite_16(composite, fg, alpha, bg)       \
+     { png_uint_32 temp = (png_uint_32)((png_uint_32)(fg)  \
+           * (png_uint_32)(alpha)                          \
+           + (png_uint_32)(bg)*(png_uint_32)(65535L        \
+           - (png_uint_32)(alpha)) + (png_uint_32)32768L); \
+       (composite) = (png_uint_16)((temp + (temp >> 16)) >> 16); }
+
+#else  /* Standard method using integer division */
+
+#  define png_composite(composite, fg, alpha, bg)                            \
+     (composite) = (png_byte)(((png_uint_16)(fg) * (png_uint_16)(alpha) +    \
+       (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) +       \
+       (png_uint_16)127) / 255)
+
+#  define png_composite_16(composite, fg, alpha, bg)                         \
+     (composite) = (png_uint_16)(((png_uint_32)(fg) * (png_uint_32)(alpha) + \
+       (png_uint_32)(bg)*(png_uint_32)(65535L - (png_uint_32)(alpha)) +      \
+       (png_uint_32)32767) / (png_uint_32)65535L)
+#endif /* PNG_READ_COMPOSITE_NODIV_SUPPORTED */
+
+#ifdef PNG_USE_READ_MACROS
+/* Inline macros to do direct reads of bytes from the input buffer.
+ * The png_get_int_32() routine assumes we are using two's complement
+ * format for negative values, which is almost certainly true.
+ */
+/* We could make special-case BIG_ENDIAN macros that do direct reads here */
+#  define png_get_uint_32(buf) \
+     (((png_uint_32)(*(buf)) << 24) + \
+      ((png_uint_32)(*((buf) + 1)) << 16) + \
+      ((png_uint_32)(*((buf) + 2)) << 8) + \
+      ((png_uint_32)(*((buf) + 3))))
+#  define png_get_uint_16(buf) \
+     (((png_uint_32)(*(buf)) << 8) + \
+      ((png_uint_32)(*((buf) + 1))))
+#ifdef PNG_GET_INT_32_SUPPORTED
+#  define png_get_int_32(buf) \
+     (((png_int_32)(*(buf)) << 24) + \
+      ((png_int_32)(*((buf) + 1)) << 16) + \
+      ((png_int_32)(*((buf) + 2)) << 8) + \
+      ((png_int_32)(*((buf) + 3))))
+#endif
+#else
+extern PNG_EXPORT(png_uint_32,png_get_uint_32) PNGARG((png_bytep buf));
+extern PNG_EXPORT(png_uint_16,png_get_uint_16) PNGARG((png_bytep buf));
+#ifdef PNG_GET_INT_32_SUPPORTED
+extern PNG_EXPORT(png_int_32,png_get_int_32) PNGARG((png_bytep buf));
+#endif
+#endif
+extern PNG_EXPORT(png_uint_32,png_get_uint_31)
+  PNGARG((png_structp png_ptr, png_bytep buf));
+/* No png_get_int_16 -- may be added if there's a real need for it. */
+
+/* Place a 32-bit number into a buffer in PNG byte order (big-endian). */
+extern PNG_EXPORT(void,png_save_uint_32)
+   PNGARG((png_bytep buf, png_uint_32 i));
+extern PNG_EXPORT(void,png_save_int_32)
+   PNGARG((png_bytep buf, png_int_32 i));
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+extern PNG_EXPORT(void,png_save_uint_16)
+   PNGARG((png_bytep buf, unsigned int i));
+/* No png_save_int_16 -- may be added if there's a real need for it. */
+
+/* ************************************************************************* */
+
+/* Various modes of operation.  Note that after an init, mode is set to
+ * zero automatically when the structure is created.
+ */
+#define PNG_HAVE_IHDR               0x01
+#define PNG_HAVE_PLTE               0x02
+#define PNG_HAVE_IDAT               0x04
+#define PNG_AFTER_IDAT              0x08 /* Have complete zlib datastream */
+#define PNG_HAVE_IEND               0x10
+#define PNG_HAVE_gAMA               0x20
+#define PNG_HAVE_cHRM               0x40
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PNG_VERSION_INFO_ONLY */
+/* Do not put anything past this line */
+#endif /* PNG_H */
diff --git a/GLideNHQ/inc/pngconf.h b/GLideNHQ/inc/pngconf.h
new file mode 100644
index 00000000..4aacf9c7
--- /dev/null
+++ b/GLideNHQ/inc/pngconf.h
@@ -0,0 +1,1510 @@
+
+/* pngconf.h - machine configurable file for libpng
+ *
+ * libpng version 1.4.1 - February 25, 2010
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2010 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ */
+
+/* Any machine specific code is near the front of this file, so if you
+ * are configuring libpng for a machine, you may want to read the section
+ * starting here down to where it starts to typedef png_color, png_text,
+ * and png_info.
+ */
+
+#ifndef PNGCONF_H
+#define PNGCONF_H
+
+#ifndef PNG_NO_LIMITS_H
+#  include <limits.h>
+#endif
+
+/* Added at libpng-1.2.9 */
+
+/* config.h is created by and PNG_CONFIGURE_LIBPNG is set by the "configure"
+ * script.
+ */
+#ifdef PNG_CONFIGURE_LIBPNG
+#  ifdef HAVE_CONFIG_H
+#    include "config.h"
+#  endif
+#endif
+
+/*
+ * Added at libpng-1.2.8
+ *
+ * PNG_USER_CONFIG has to be defined on the compiler command line. This
+ * includes the resource compiler for Windows DLL configurations.
+ */
+#ifdef PNG_USER_CONFIG
+#  ifndef PNG_USER_PRIVATEBUILD
+#    define PNG_USER_PRIVATEBUILD
+#  endif
+#  include "pngusr.h"
+#endif
+
+/*
+ * If you create a private DLL you need to define in "pngusr.h" the followings:
+ * #define PNG_USER_PRIVATEBUILD <Describes by whom and why this version of
+ *        the DLL was built>
+ *  e.g. #define PNG_USER_PRIVATEBUILD "Build by MyCompany for xyz reasons."
+ * #define PNG_USER_DLLFNAME_POSTFIX <two-letter postfix that serve to
+ *        distinguish your DLL from those of the official release. These
+ *        correspond to the trailing letters that come after the version
+ *        number and must match your private DLL name>
+ *  e.g. // private DLL "libpng13gx.dll"
+ *       #define PNG_USER_DLLFNAME_POSTFIX "gx"
+ *
+ * The following macros are also at your disposal if you want to complete the
+ * DLL VERSIONINFO structure.
+ * - PNG_USER_VERSIONINFO_COMMENTS
+ * - PNG_USER_VERSIONINFO_COMPANYNAME
+ * - PNG_USER_VERSIONINFO_LEGALTRADEMARKS
+ */
+
+#ifdef __STDC__
+#  ifdef SPECIALBUILD
+#    pragma message("PNG_LIBPNG_SPECIALBUILD (and deprecated SPECIALBUILD)\
+     are now LIBPNG reserved macros. Use PNG_USER_PRIVATEBUILD instead.")
+#  endif
+
+#  ifdef PRIVATEBUILD
+#    pragma message("PRIVATEBUILD is deprecated.\
+     Use PNG_USER_PRIVATEBUILD instead.")
+#    define PNG_USER_PRIVATEBUILD PRIVATEBUILD
+#  endif
+#endif /* __STDC__ */
+
+/* End of material added to libpng-1.2.8 */
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* This is the size of the compression buffer, and thus the size of
+ * an IDAT chunk.  Make this whatever size you feel is best for your
+ * machine.  One of these will be allocated per png_struct.  When this
+ * is full, it writes the data to the disk, and does some other
+ * calculations.  Making this an extremely small size will slow
+ * the library down, but you may want to experiment to determine
+ * where it becomes significant, if you are concerned with memory
+ * usage.  Note that zlib allocates at least 32Kb also.  For readers,
+ * this describes the size of the buffer available to read the data in.
+ * Unless this gets smaller than the size of a row (compressed),
+ * it should not make much difference how big this is.
+ */
+
+#ifndef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 8192
+#endif
+
+/* Enable if you want a write-only libpng */
+
+#ifndef PNG_NO_READ_SUPPORTED
+#  define PNG_READ_SUPPORTED
+#endif
+
+/* Enable if you want a read-only libpng */
+
+#ifndef PNG_NO_WRITE_SUPPORTED
+#  define PNG_WRITE_SUPPORTED
+#endif
+
+/* Enabled in 1.4.0. */
+#ifdef PNG_ALLOW_BENIGN_ERRORS
+#  define png_benign_error png_warning
+#  define png_chunk_benign_error png_chunk_warning
+#else
+#  ifndef PNG_BENIGN_ERRORS_SUPPORTED
+#    define png_benign_error png_error
+#    define png_chunk_benign_error png_chunk_error
+#  endif
+#endif
+
+/* Added at libpng version 1.4.0 */
+#if !defined(PNG_NO_WARNINGS) && !defined(PNG_WARNINGS_SUPPORTED)
+#  define PNG_WARNINGS_SUPPORTED
+#endif
+
+/* Added at libpng version 1.4.0 */
+#if !defined(PNG_NO_ERROR_TEXT) && !defined(PNG_ERROR_TEXT_SUPPORTED)
+#  define PNG_ERROR_TEXT_SUPPORTED
+#endif
+
+/* Added at libpng version 1.4.0 */
+#if !defined(PNG_NO_CHECK_cHRM) && !defined(PNG_CHECK_cHRM_SUPPORTED)
+#  define PNG_CHECK_cHRM_SUPPORTED
+#endif
+
+/* Added at libpng version 1.4.0 */
+#if !defined(PNG_NO_ALIGNED_MEMORY) && !defined(PNG_ALIGNED_MEMORY_SUPPORTED)
+#  define PNG_ALIGNED_MEMORY_SUPPORTED
+#endif
+
+/* Enabled by default in 1.2.0.  You can disable this if you don't need to
+   support PNGs that are embedded in MNG datastreams */
+#ifndef PNG_NO_MNG_FEATURES
+#  ifndef PNG_MNG_FEATURES_SUPPORTED
+#    define PNG_MNG_FEATURES_SUPPORTED
+#  endif
+#endif
+
+/* Added at libpng version 1.4.0 */
+#ifndef PNG_NO_FLOATING_POINT_SUPPORTED
+#  ifndef PNG_FLOATING_POINT_SUPPORTED
+#    define PNG_FLOATING_POINT_SUPPORTED
+#  endif
+#endif
+
+/* Added at libpng-1.4.0beta49 for testing (this test is no longer used
+   in libpng and png_calloc() is always present)
+ */
+#define PNG_CALLOC_SUPPORTED
+
+/* If you are running on a machine where you cannot allocate more
+ * than 64K of memory at once, uncomment this.  While libpng will not
+ * normally need that much memory in a chunk (unless you load up a very
+ * large file), zlib needs to know how big of a chunk it can use, and
+ * libpng thus makes sure to check any memory allocation to verify it
+ * will fit into memory.
+#define PNG_MAX_MALLOC_64K
+ */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+#  define PNG_MAX_MALLOC_64K
+#endif
+
+/* Special munging to support doing things the 'cygwin' way:
+ * 'Normal' png-on-win32 defines/defaults:
+ *   PNG_BUILD_DLL -- building dll
+ *   PNG_USE_DLL   -- building an application, linking to dll
+ *   (no define)   -- building static library, or building an
+ *                    application and linking to the static lib
+ * 'Cygwin' defines/defaults:
+ *   PNG_BUILD_DLL -- (ignored) building the dll
+ *   (no define)   -- (ignored) building an application, linking to the dll
+ *   PNG_STATIC    -- (ignored) building the static lib, or building an
+ *                    application that links to the static lib.
+ *   ALL_STATIC    -- (ignored) building various static libs, or building an
+ *                    application that links to the static libs.
+ * Thus,
+ * a cygwin user should define either PNG_BUILD_DLL or PNG_STATIC, and
+ * this bit of #ifdefs will define the 'correct' config variables based on
+ * that. If a cygwin user *wants* to define 'PNG_USE_DLL' that's okay, but
+ * unnecessary.
+ *
+ * Also, the precedence order is:
+ *   ALL_STATIC (since we can't #undef something outside our namespace)
+ *   PNG_BUILD_DLL
+ *   PNG_STATIC
+ *   (nothing) == PNG_USE_DLL
+ *
+ * CYGWIN (2002-01-20): The preceding is now obsolete. With the advent
+ *   of auto-import in binutils, we no longer need to worry about
+ *   __declspec(dllexport) / __declspec(dllimport) and friends.  Therefore,
+ *   we don't need to worry about PNG_STATIC or ALL_STATIC when it comes
+ *   to __declspec() stuff.  However, we DO need to worry about
+ *   PNG_BUILD_DLL and PNG_STATIC because those change some defaults
+ *   such as CONSOLE_IO.
+ */
+#ifdef __CYGWIN__
+#  ifdef ALL_STATIC
+#    ifdef PNG_BUILD_DLL
+#      undef PNG_BUILD_DLL
+#    endif
+#    ifdef PNG_USE_DLL
+#      undef PNG_USE_DLL
+#    endif
+#    ifdef PNG_DLL
+#      undef PNG_DLL
+#    endif
+#    ifndef PNG_STATIC
+#      define PNG_STATIC
+#    endif
+#  else
+#    ifdef PNG_BUILD_DLL
+#      ifdef PNG_STATIC
+#        undef PNG_STATIC
+#      endif
+#      ifdef PNG_USE_DLL
+#        undef PNG_USE_DLL
+#      endif
+#      ifndef PNG_DLL
+#        define PNG_DLL
+#      endif
+#    else
+#      ifdef PNG_STATIC
+#        ifdef PNG_USE_DLL
+#          undef PNG_USE_DLL
+#        endif
+#        ifdef PNG_DLL
+#          undef PNG_DLL
+#        endif
+#      else
+#        ifndef PNG_USE_DLL
+#          define PNG_USE_DLL
+#        endif
+#        ifndef PNG_DLL
+#          define PNG_DLL
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* This protects us against compilers that run on a windowing system
+ * and thus don't have or would rather us not use the stdio types:
+ * stdin, stdout, and stderr.  The only one currently used is stderr
+ * in png_error() and png_warning().  #defining PNG_NO_CONSOLE_IO will
+ * prevent these from being compiled and used. #defining PNG_NO_STDIO
+ * will also prevent these, plus will prevent the entire set of stdio
+ * macros and functions (FILE *, printf, etc.) from being compiled and used,
+ * unless (PNG_DEBUG > 0) has been #defined.
+ *
+ * #define PNG_NO_CONSOLE_IO
+ * #define PNG_NO_STDIO
+ */
+
+#if !defined(PNG_NO_STDIO) && !defined(PNG_STDIO_SUPPORTED)
+#  define PNG_STDIO_SUPPORTED
+#endif
+
+
+#ifdef PNG_BUILD_DLL
+#  if !defined(PNG_CONSOLE_IO_SUPPORTED) && !defined(PNG_NO_CONSOLE_IO)
+#    define PNG_NO_CONSOLE_IO
+#  endif
+#endif
+
+#  ifdef PNG_NO_STDIO
+#    ifndef PNG_NO_CONSOLE_IO
+#      define PNG_NO_CONSOLE_IO
+#    endif
+#    ifdef PNG_DEBUG
+#      if (PNG_DEBUG > 0)
+#        include <stdio.h>
+#      endif
+#    endif
+#  else
+#    include <stdio.h>
+#  endif
+
+#if !(defined PNG_NO_CONSOLE_IO) && !defined(PNG_CONSOLE_IO_SUPPORTED)
+#  define PNG_CONSOLE_IO_SUPPORTED
+#endif
+
+/* This macro protects us against machines that don't have function
+ * prototypes (ie K&R style headers).  If your compiler does not handle
+ * function prototypes, define this macro and use the included ansi2knr.
+ * I've always been able to use _NO_PROTO as the indicator, but you may
+ * need to drag the empty declaration out in front of here, or change the
+ * ifdef to suit your own needs.
+ */
+#ifndef PNGARG
+
+#ifdef OF /* zlib prototype munger */
+#  define PNGARG(arglist) OF(arglist)
+#else
+
+#ifdef _NO_PROTO
+#  define PNGARG(arglist) ()
+#else
+#  define PNGARG(arglist) arglist
+#endif /* _NO_PROTO */
+
+#endif /* OF */
+
+#endif /* PNGARG */
+
+/* Try to determine if we are compiling on a Mac.  Note that testing for
+ * just __MWERKS__ is not good enough, because the Codewarrior is now used
+ * on non-Mac platforms.
+ */
+#ifndef MACOS
+#  if (defined(__MWERKS__) && defined(macintosh)) || defined(applec) || \
+      defined(THINK_C) || defined(__SC__) || defined(TARGET_OS_MAC)
+#    define MACOS
+#  endif
+#endif
+
+/* Enough people need this for various reasons to include it here */
+#if !defined(MACOS) && !defined(RISCOS)
+#  include <sys/types.h>
+#endif
+
+/* PNG_SETJMP_NOT_SUPPORTED and PNG_NO_SETJMP_SUPPORTED are deprecated. */
+#if !defined(PNG_NO_SETJMP) && \
+    !defined(PNG_SETJMP_NOT_SUPPORTED) && !defined(PNG_NO_SETJMP_SUPPORTED)
+#  define PNG_SETJMP_SUPPORTED
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* This is an attempt to force a single setjmp behaviour on Linux.  If
+ * the X config stuff didn't define _BSD_SOURCE we wouldn't need this.
+ *
+ * You can bypass this test if you know that your application uses exactly
+ * the same setjmp.h that was included when libpng was built.  Only define
+ * PNG_SKIP_SETJMP_CHECK while building your application, prior to the
+ * application's '#include "png.h"'. Don't define PNG_SKIP_SETJMP_CHECK
+ * while building a separate libpng library for general use.
+ */
+
+#  ifndef PNG_SKIP_SETJMP_CHECK
+#    ifdef __linux__
+#      ifdef _BSD_SOURCE
+#        define PNG_SAVE_BSD_SOURCE
+#        undef _BSD_SOURCE
+#      endif
+#      ifdef _SETJMP_H
+       /* If you encounter a compiler error here, see the explanation
+        * near the end of INSTALL.
+        */
+           __pngconf.h__ in libpng already includes setjmp.h;
+           __dont__ include it again.;
+#      endif
+#    endif /* __linux__ */
+#  endif /* PNG_SKIP_SETJMP_CHECK */
+
+   /* Include setjmp.h for error handling */
+#  include <setjmp.h>
+
+#  ifdef __linux__
+#    ifdef PNG_SAVE_BSD_SOURCE
+#      ifdef _BSD_SOURCE
+#        undef _BSD_SOURCE
+#      endif
+#      define _BSD_SOURCE
+#      undef PNG_SAVE_BSD_SOURCE
+#    endif
+#  endif /* __linux__ */
+#endif /* PNG_SETJMP_SUPPORTED */
+
+#ifdef BSD
+#  include <strings.h>
+#else
+#  include <string.h>
+#endif
+
+/* Other defines for things like memory and the like can go here.  */
+
+/* This controls how fine the dithering gets.  As this allocates
+ * a largish chunk of memory (32K), those who are not as concerned
+ * with dithering quality can decrease some or all of these.
+ */
+#ifndef PNG_DITHER_RED_BITS
+#  define PNG_DITHER_RED_BITS 5
+#endif
+#ifndef PNG_DITHER_GREEN_BITS
+#  define PNG_DITHER_GREEN_BITS 5
+#endif
+#ifndef PNG_DITHER_BLUE_BITS
+#  define PNG_DITHER_BLUE_BITS 5
+#endif
+
+/* This controls how fine the gamma correction becomes when you
+ * are only interested in 8 bits anyway.  Increasing this value
+ * results in more memory being used, and more pow() functions
+ * being called to fill in the gamma tables.  Don't set this value
+ * less then 8, and even that may not work (I haven't tested it).
+ */
+
+#ifndef PNG_MAX_GAMMA_8
+#  define PNG_MAX_GAMMA_8 11
+#endif
+
+/* This controls how much a difference in gamma we can tolerate before
+ * we actually start doing gamma conversion.
+ */
+#ifndef PNG_GAMMA_THRESHOLD
+#  define PNG_GAMMA_THRESHOLD 0.05
+#endif
+
+/* The following uses const char * instead of char * for error
+ * and warning message functions, so some compilers won't complain.
+ * If you do not want to use const, define PNG_NO_CONST here.
+ */
+
+#ifndef PNG_CONST
+#  ifndef PNG_NO_CONST
+#    define PNG_CONST const
+#  else
+#    define PNG_CONST
+#  endif
+#endif
+
+/* The following defines give you the ability to remove code from the
+ * library that you will not be using.  I wish I could figure out how to
+ * automate this, but I can't do that without making it seriously hard
+ * on the users.  So if you are not using an ability, change the #define
+ * to and #undef, and that part of the library will not be compiled.  If
+ * your linker can't find a function, you may want to make sure the
+ * ability is defined here.  Some of these depend upon some others being
+ * defined.  I haven't figured out all the interactions here, so you may
+ * have to experiment awhile to get everything to compile.  If you are
+ * creating or using a shared library, you probably shouldn't touch this,
+ * as it will affect the size of the structures, and this will cause bad
+ * things to happen if the library and/or application ever change.
+ */
+
+/* Any features you will not be using can be undef'ed here */
+
+/* GR-P, 0.96a: Set "*TRANSFORMS_SUPPORTED as default but allow user
+ * to turn it off with PNG_NO_READ|WRITE_TRANSFORMS on the compile line,
+ * then pick and choose which ones to define without having to edit this
+ * file. It is safe to use the PNG_NO_READ|WRITE_TRANSFORMS
+ * if you only want to have a png-compliant reader/writer but don't need
+ * any of the extra transformations.  This saves about 80 kbytes in a
+ * typical installation of the library. (PNG_NO_* form added in version
+ * 1.0.1c, for consistency; PNG_*_TRANSFORMS_NOT_SUPPORTED deprecated in
+ * 1.4.0)
+ */
+
+/* Ignore attempt to turn off both floating and fixed point support */
+#if !defined(PNG_FLOATING_POINT_SUPPORTED) || \
+    !defined(PNG_NO_FIXED_POINT_SUPPORTED)
+#  define PNG_FIXED_POINT_SUPPORTED
+#endif
+
+#ifdef PNG_READ_SUPPORTED
+
+/* PNG_READ_TRANSFORMS_NOT_SUPPORTED is deprecated. */
+#if !defined(PNG_READ_TRANSFORMS_NOT_SUPPORTED) && \
+      !defined(PNG_NO_READ_TRANSFORMS)
+#  define PNG_READ_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_READ_EXPAND
+#    define PNG_READ_EXPAND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SHIFT
+#    define PNG_READ_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACK
+#    define PNG_READ_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_BGR
+#    define PNG_READ_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP
+#    define PNG_READ_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACKSWAP
+#    define PNG_READ_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT
+#    define PNG_READ_INVERT_SUPPORTED
+#  endif
+#if 0 /* removed from libpng-1.4.0 */
+#  ifndef PNG_NO_READ_DITHER
+#    define PNG_READ_DITHER_SUPPORTED
+#  endif
+#endif /* 0 */
+#  ifndef PNG_NO_READ_BACKGROUND
+#    define PNG_READ_BACKGROUND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_16_TO_8
+#    define PNG_READ_16_TO_8_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_FILLER
+#    define PNG_READ_FILLER_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GAMMA
+#    define PNG_READ_GAMMA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GRAY_TO_RGB
+#    define PNG_READ_GRAY_TO_RGB_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP_ALPHA
+#    define PNG_READ_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT_ALPHA
+#    define PNG_READ_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_STRIP_ALPHA
+#    define PNG_READ_STRIP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_USER_TRANSFORM
+#    define PNG_READ_USER_TRANSFORM_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_RGB_TO_GRAY
+#    define PNG_READ_RGB_TO_GRAY_SUPPORTED
+#  endif
+#endif /* PNG_READ_TRANSFORMS_SUPPORTED */
+
+/* PNG_PROGRESSIVE_READ_NOT_SUPPORTED is deprecated. */
+#if !defined(PNG_NO_PROGRESSIVE_READ) && \
+ !defined(PNG_PROGRESSIVE_READ_NOT_SUPPORTED)  /* if you don't do progressive */
+#  define PNG_PROGRESSIVE_READ_SUPPORTED     /* reading.  This is not talking */
+#endif                               /* about interlacing capability!  You'll */
+            /* still have interlacing unless you change the following define: */
+
+#define PNG_READ_INTERLACING_SUPPORTED /* required for PNG-compliant decoders */
+
+/* PNG_NO_SEQUENTIAL_READ_SUPPORTED is deprecated. */
+#if !defined(PNG_NO_SEQUENTIAL_READ) && \
+    !defined(PNG_SEQUENTIAL_READ_SUPPORTED) && \
+    !defined(PNG_NO_SEQUENTIAL_READ_SUPPORTED)
+#  define PNG_SEQUENTIAL_READ_SUPPORTED
+#endif
+
+#ifndef PNG_NO_READ_COMPOSITE_NODIV
+#  ifndef PNG_NO_READ_COMPOSITED_NODIV  /* libpng-1.0.x misspelling */
+#    define PNG_READ_COMPOSITE_NODIV_SUPPORTED   /* well tested on Intel, SGI */
+#  endif
+#endif
+
+#if !defined(PNG_NO_GET_INT_32) || defined(PNG_READ_oFFS_SUPPORTED) || \
+    defined(PNG_READ_pCAL_SUPPORTED)
+#  ifndef PNG_GET_INT_32_SUPPORTED
+#    define PNG_GET_INT_32_SUPPORTED
+#  endif
+#endif
+
+#endif /* PNG_READ_SUPPORTED */
+
+#ifdef PNG_WRITE_SUPPORTED
+
+/* PNG_WRITE_TRANSFORMS_NOT_SUPPORTED is deprecated. */
+#if !defined(PNG_WRITE_TRANSFORMS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_TRANSFORMS)
+#  define PNG_WRITE_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_WRITE_SHIFT
+#    define PNG_WRITE_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACK
+#    define PNG_WRITE_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_BGR
+#    define PNG_WRITE_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP
+#    define PNG_WRITE_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACKSWAP
+#    define PNG_WRITE_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT
+#    define PNG_WRITE_INVERT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_FILLER
+#    define PNG_WRITE_FILLER_SUPPORTED   /* same as WRITE_STRIP_ALPHA */
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP_ALPHA
+#    define PNG_WRITE_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT_ALPHA
+#    define PNG_WRITE_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_USER_TRANSFORM
+#    define PNG_WRITE_USER_TRANSFORM_SUPPORTED
+#  endif
+#endif /* PNG_WRITE_TRANSFORMS_SUPPORTED */
+
+#if !defined(PNG_NO_WRITE_INTERLACING_SUPPORTED) && \
+    !defined(PNG_WRITE_INTERLACING_SUPPORTED)
+    /* This is not required for PNG-compliant encoders, but can cause
+     * trouble if left undefined
+    */
+#  define PNG_WRITE_INTERLACING_SUPPORTED
+#endif
+
+#if !defined(PNG_NO_WRITE_WEIGHTED_FILTER) && \
+    !defined(PNG_WRITE_WEIGHTED_FILTER) && \
+     defined(PNG_FLOATING_POINT_SUPPORTED)
+#  define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
+#endif
+
+#ifndef PNG_NO_WRITE_FLUSH
+#  define PNG_WRITE_FLUSH_SUPPORTED
+#endif
+
+#if !defined(PNG_NO_SAVE_INT_32) || defined(PNG_WRITE_oFFS_SUPPORTED) || \
+    defined(PNG_WRITE_pCAL_SUPPORTED)
+#  ifndef PNG_SAVE_INT_32_SUPPORTED
+#    define PNG_SAVE_INT_32_SUPPORTED
+#  endif
+#endif
+
+#endif /* PNG_WRITE_SUPPORTED */
+
+#define PNG_NO_ERROR_NUMBERS
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+#  ifndef PNG_NO_USER_TRANSFORM_PTR
+#    define PNG_USER_TRANSFORM_PTR_SUPPORTED
+#  endif
+#endif
+
+#if defined(PNG_STDIO_SUPPORTED) && !defined(PNG_TIME_RFC1123_SUPPORTED)
+#  define PNG_TIME_RFC1123_SUPPORTED
+#endif
+
+/* This adds extra functions in pngget.c for accessing data from the
+ * info pointer (added in version 0.99)
+ * png_get_image_width()
+ * png_get_image_height()
+ * png_get_bit_depth()
+ * png_get_color_type()
+ * png_get_compression_type()
+ * png_get_filter_type()
+ * png_get_interlace_type()
+ * png_get_pixel_aspect_ratio()
+ * png_get_pixels_per_meter()
+ * png_get_x_offset_pixels()
+ * png_get_y_offset_pixels()
+ * png_get_x_offset_microns()
+ * png_get_y_offset_microns()
+ */
+#if !defined(PNG_NO_EASY_ACCESS) && !defined(PNG_EASY_ACCESS_SUPPORTED)
+#  define PNG_EASY_ACCESS_SUPPORTED
+#endif
+
+/* Added at libpng-1.2.0 */
+#if !defined(PNG_NO_USER_MEM) && !defined(PNG_USER_MEM_SUPPORTED)
+#  define PNG_USER_MEM_SUPPORTED
+#endif
+
+/* Added at libpng-1.2.6 */
+#ifndef PNG_NO_SET_USER_LIMITS
+#  ifndef PNG_SET_USER_LIMITS_SUPPORTED
+#    define PNG_SET_USER_LIMITS_SUPPORTED
+#  endif
+  /* Feature added at libpng-1.4.0, this flag added at 1.4.1 */
+#  ifndef PNG_SET_CHUNK_CACHE_LIMIT_SUPPORTED
+#    define PNG_SET_CHUNK_CACHE_LIMIT_SUPPORTED
+#  endif
+  /* Feature added at libpng-1.4.1, this flag added at 1.4.1 */
+#  ifndef PNG_SET_CHUNK_MALLOC_LIMIT_SUPPORTED
+#    define PNG_SET_CHUNK_MALLOC_LIMIT_SUPPORTED
+#  endif
+#endif
+
+/* Added at libpng-1.2.43 */
+#ifndef PNG_USER_LIMITS_SUPPORTED
+#  ifndef PNG_NO_USER_LIMITS
+#    define PNG_USER_LIMITS_SUPPORTED
+#  endif
+#endif
+
+/* Added at libpng-1.0.16 and 1.2.6.  To accept all valid PNGs no matter
+ * how large, set these two limits to 0x7fffffffL
+ */
+#ifndef PNG_USER_WIDTH_MAX
+#  define PNG_USER_WIDTH_MAX 1000000L
+#endif
+#ifndef PNG_USER_HEIGHT_MAX
+#  define PNG_USER_HEIGHT_MAX 1000000L
+#endif
+
+/* Added at libpng-1.2.43.  To accept all valid PNGs no matter
+ * how large, set these two limits to 0.
+ */
+#ifndef PNG_USER_CHUNK_CACHE_MAX
+#  define PNG_USER_CHUNK_CACHE_MAX 0
+#endif
+
+/* Added at libpng-1.2.43 */
+#ifndef PNG_USER_CHUNK_MALLOC_MAX
+#  define PNG_USER_CHUNK_MALLOC_MAX 0
+#endif
+
+/* Added at libpng-1.4.0 */
+#if !defined(PNG_NO_IO_STATE) && !defined(PNG_IO_STATE_SUPPORTED)
+#  define PNG_IO_STATE_SUPPORTED
+#endif
+
+#ifndef PNG_LITERAL_SHARP
+#  define PNG_LITERAL_SHARP 0x23
+#endif
+#ifndef PNG_LITERAL_LEFT_SQUARE_BRACKET
+#  define PNG_LITERAL_LEFT_SQUARE_BRACKET 0x5b
+#endif
+#ifndef PNG_LITERAL_RIGHT_SQUARE_BRACKET
+#  define PNG_LITERAL_RIGHT_SQUARE_BRACKET 0x5d
+#endif
+#ifndef PNG_STRING_NEWLINE
+#define PNG_STRING_NEWLINE "\n"
+#endif
+
+/* These are currently experimental features, define them if you want */
+
+/* Very little testing */
+/*
+#ifdef PNG_READ_SUPPORTED
+#  ifndef PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#    define PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#  endif
+#endif
+*/
+
+/* This is only for PowerPC big-endian and 680x0 systems */
+/* some testing */
+/*
+#ifndef PNG_READ_BIG_ENDIAN_SUPPORTED
+#  define PNG_READ_BIG_ENDIAN_SUPPORTED
+#endif
+*/
+
+#if !defined(PNG_NO_USE_READ_MACROS) && !defined(PNG_USE_READ_MACROS)
+#  define PNG_USE_READ_MACROS
+#endif
+
+/* Buggy compilers (e.g., gcc 2.7.2.2) need PNG_NO_POINTER_INDEXING */
+
+#if !defined(PNG_NO_POINTER_INDEXING) && \
+    !defined(PNG_POINTER_INDEXING_SUPPORTED)
+#  define PNG_POINTER_INDEXING_SUPPORTED
+#endif
+
+
+/* Any chunks you are not interested in, you can undef here.  The
+ * ones that allocate memory may be expecially important (hIST,
+ * tEXt, zTXt, tRNS, pCAL).  Others will just save time and make png_info
+ * a bit smaller.
+ */
+
+/* The size of the png_text structure changed in libpng-1.0.6 when
+ * iTXt support was added.  iTXt support was turned off by default through
+ * libpng-1.2.x, to support old apps that malloc the png_text structure
+ * instead of calling png_set_text() and letting libpng malloc it.  It
+ * was turned on by default in libpng-1.4.0.
+ */
+
+/* PNG_READ_ANCILLARY_CHUNKS_NOT_SUPPORTED is deprecated. */
+#if defined(PNG_READ_SUPPORTED) && \
+    !defined(PNG_READ_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_READ_ANCILLARY_CHUNKS)
+#  define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+/* PNG_WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED is deprecated. */
+#if defined(PNG_WRITE_SUPPORTED) && \
+    !defined(PNG_WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_ANCILLARY_CHUNKS)
+#  define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_READ_TEXT
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_READ_tEXt
+#  define PNG_NO_READ_zTXt
+#endif
+
+#ifndef PNG_NO_READ_bKGD
+#  define PNG_READ_bKGD_SUPPORTED
+#  define PNG_bKGD_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_cHRM
+#  define PNG_READ_cHRM_SUPPORTED
+#  define PNG_cHRM_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_gAMA
+#  define PNG_READ_gAMA_SUPPORTED
+#  define PNG_gAMA_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_hIST
+#  define PNG_READ_hIST_SUPPORTED
+#  define PNG_hIST_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iCCP
+#  define PNG_READ_iCCP_SUPPORTED
+#  define PNG_iCCP_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iTXt
+#  ifndef PNG_READ_iTXt_SUPPORTED
+#    define PNG_READ_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_READ_oFFs
+#  define PNG_READ_oFFs_SUPPORTED
+#  define PNG_oFFs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pCAL
+#  define PNG_READ_pCAL_SUPPORTED
+#  define PNG_pCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sCAL
+#  define PNG_READ_sCAL_SUPPORTED
+#  define PNG_sCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pHYs
+#  define PNG_READ_pHYs_SUPPORTED
+#  define PNG_pHYs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sBIT
+#  define PNG_READ_sBIT_SUPPORTED
+#  define PNG_sBIT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sPLT
+#  define PNG_READ_sPLT_SUPPORTED
+#  define PNG_sPLT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sRGB
+#  define PNG_READ_sRGB_SUPPORTED
+#  define PNG_sRGB_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tEXt
+#  define PNG_READ_tEXt_SUPPORTED
+#  define PNG_tEXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tIME
+#  define PNG_READ_tIME_SUPPORTED
+#  define PNG_tIME_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tRNS
+#  define PNG_READ_tRNS_SUPPORTED
+#  define PNG_tRNS_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_zTXt
+#  define PNG_READ_zTXt_SUPPORTED
+#  define PNG_zTXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_OPT_PLTE
+#  define PNG_READ_OPT_PLTE_SUPPORTED /* only affects support of the */
+#endif                      /* optional PLTE chunk in RGB and RGBA images */
+#if defined(PNG_READ_iTXt_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) || \
+    defined(PNG_READ_zTXt_SUPPORTED)
+#  define PNG_READ_TEXT_SUPPORTED
+#  define PNG_TEXT_SUPPORTED
+#endif
+
+#endif /* PNG_READ_ANCILLARY_CHUNKS_SUPPORTED */
+
+#ifndef PNG_NO_READ_UNKNOWN_CHUNKS
+#  ifndef PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_READ_USER_CHUNKS_SUPPORTED
+#    define PNG_READ_USER_CHUNKS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_READ_USER_CHUNKS
+#  ifndef PNG_READ_USER_CHUNKS_SUPPORTED
+#    define PNG_READ_USER_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_USER_CHUNKS_SUPPORTED
+#    define PNG_USER_CHUNKS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#  ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#    define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#  endif
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+#ifdef PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_WRITE_TEXT
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_WRITE_tEXt
+#  define PNG_NO_WRITE_zTXt
+#endif
+#ifndef PNG_NO_WRITE_bKGD
+#  define PNG_WRITE_bKGD_SUPPORTED
+#  ifndef PNG_bKGD_SUPPORTED
+#    define PNG_bKGD_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_cHRM
+#  define PNG_WRITE_cHRM_SUPPORTED
+#  ifndef PNG_cHRM_SUPPORTED
+#    define PNG_cHRM_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_gAMA
+#  define PNG_WRITE_gAMA_SUPPORTED
+#  ifndef PNG_gAMA_SUPPORTED
+#    define PNG_gAMA_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_hIST
+#  define PNG_WRITE_hIST_SUPPORTED
+#  ifndef PNG_hIST_SUPPORTED
+#    define PNG_hIST_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iCCP
+#  define PNG_WRITE_iCCP_SUPPORTED
+#  ifndef PNG_iCCP_SUPPORTED
+#    define PNG_iCCP_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iTXt
+#  ifndef PNG_WRITE_iTXt_SUPPORTED
+#    define PNG_WRITE_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_oFFs
+#  define PNG_WRITE_oFFs_SUPPORTED
+#  ifndef PNG_oFFs_SUPPORTED
+#    define PNG_oFFs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pCAL
+#  define PNG_WRITE_pCAL_SUPPORTED
+#  ifndef PNG_pCAL_SUPPORTED
+#    define PNG_pCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sCAL
+#  define PNG_WRITE_sCAL_SUPPORTED
+#  ifndef PNG_sCAL_SUPPORTED
+#    define PNG_sCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pHYs
+#  define PNG_WRITE_pHYs_SUPPORTED
+#  ifndef PNG_pHYs_SUPPORTED
+#    define PNG_pHYs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sBIT
+#  define PNG_WRITE_sBIT_SUPPORTED
+#  ifndef PNG_sBIT_SUPPORTED
+#    define PNG_sBIT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sPLT
+#  define PNG_WRITE_sPLT_SUPPORTED
+#  ifndef PNG_sPLT_SUPPORTED
+#    define PNG_sPLT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sRGB
+#  define PNG_WRITE_sRGB_SUPPORTED
+#  ifndef PNG_sRGB_SUPPORTED
+#    define PNG_sRGB_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tEXt
+#  define PNG_WRITE_tEXt_SUPPORTED
+#  ifndef PNG_tEXt_SUPPORTED
+#    define PNG_tEXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tIME
+#  define PNG_WRITE_tIME_SUPPORTED
+#  ifndef PNG_tIME_SUPPORTED
+#    define PNG_tIME_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tRNS
+#  define PNG_WRITE_tRNS_SUPPORTED
+#  ifndef PNG_tRNS_SUPPORTED
+#    define PNG_tRNS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_zTXt
+#  define PNG_WRITE_zTXt_SUPPORTED
+#  ifndef PNG_zTXt_SUPPORTED
+#    define PNG_zTXt_SUPPORTED
+#  endif
+#endif
+#if defined(PNG_WRITE_iTXt_SUPPORTED) || defined(PNG_WRITE_tEXt_SUPPORTED) || \
+    defined(PNG_WRITE_zTXt_SUPPORTED)
+#  define PNG_WRITE_TEXT_SUPPORTED
+#  ifndef PNG_TEXT_SUPPORTED
+#    define PNG_TEXT_SUPPORTED
+#  endif
+#endif
+
+#ifdef PNG_WRITE_tIME_SUPPORTED
+#  ifndef PNG_NO_CONVERT_tIME
+#    ifndef _WIN32_WCE
+/*   The "tm" structure is not supported on WindowsCE */
+#      ifndef PNG_CONVERT_tIME_SUPPORTED
+#        define PNG_CONVERT_tIME_SUPPORTED
+#      endif
+#   endif
+#  endif
+#endif
+
+#endif /* PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED */
+
+#ifndef PNG_NO_WRITE_FILTER
+#  ifndef PNG_WRITE_FILTER_SUPPORTED
+#    define PNG_WRITE_FILTER_SUPPORTED
+#  endif
+#endif
+
+#ifndef PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#  ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#    define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#  endif
+#endif
+#endif /* PNG_WRITE_SUPPORTED */
+
+/* Turn this off to disable png_read_png() and
+ * png_write_png() and leave the row_pointers member
+ * out of the info structure.
+ */
+#ifndef PNG_NO_INFO_IMAGE
+#  define PNG_INFO_IMAGE_SUPPORTED
+#endif
+
+/* Need the time information for converting tIME chunks */
+#ifdef PNG_CONVERT_tIME_SUPPORTED
+     /* "time.h" functions are not supported on WindowsCE */
+#    include <time.h>
+#endif
+
+/* Some typedefs to get us started.  These should be safe on most of the
+ * common platforms.  The typedefs should be at least as large as the
+ * numbers suggest (a png_uint_32 must be at least 32 bits long), but they
+ * don't have to be exactly that size.  Some compilers dislike passing
+ * unsigned shorts as function parameters, so you may be better off using
+ * unsigned int for png_uint_16.
+ */
+
+#if defined(INT_MAX) && (INT_MAX > 0x7ffffffeL)
+typedef unsigned int png_uint_32;
+typedef int png_int_32;
+#else
+typedef unsigned long png_uint_32;
+typedef long png_int_32;
+#endif
+typedef unsigned short png_uint_16;
+typedef short png_int_16;
+typedef unsigned char png_byte;
+
+#ifdef PNG_NO_SIZE_T
+   typedef unsigned int png_size_t;
+#else
+   typedef size_t png_size_t;
+#endif
+#define png_sizeof(x) sizeof(x)
+
+/* The following is needed for medium model support.  It cannot be in the
+ * pngpriv.h header.  Needs modification for other compilers besides
+ * MSC.  Model independent support declares all arrays and pointers to be
+ * large using the far keyword.  The zlib version used must also support
+ * model independent data.  As of version zlib 1.0.4, the necessary changes
+ * have been made in zlib.  The USE_FAR_KEYWORD define triggers other
+ * changes that are needed. (Tim Wegner)
+ */
+
+/* Separate compiler dependencies (problem here is that zlib.h always
+ * defines FAR. (SJT)
+ */
+#ifdef __BORLANDC__
+#  if defined(__LARGE__) || defined(__HUGE__) || defined(__COMPACT__)
+#    define LDATA 1
+#  else
+#    define LDATA 0
+#  endif
+   /* GRR:  why is Cygwin in here?  Cygwin is not Borland C... */
+#  if !defined(__WIN32__) && !defined(__FLAT__) && !defined(__CYGWIN__)
+#    define PNG_MAX_MALLOC_64K
+#    if (LDATA != 1)
+#      ifndef FAR
+#        define FAR __far
+#      endif
+#      define USE_FAR_KEYWORD
+#    endif   /* LDATA != 1 */
+     /* Possibly useful for moving data out of default segment.
+      * Uncomment it if you want. Could also define FARDATA as
+      * const if your compiler supports it. (SJT)
+#    define FARDATA FAR
+      */
+#  endif  /* __WIN32__, __FLAT__, __CYGWIN__ */
+#endif   /* __BORLANDC__ */
+
+
+/* Suggest testing for specific compiler first before testing for
+ * FAR.  The Watcom compiler defines both __MEDIUM__ and M_I86MM,
+ * making reliance oncertain keywords suspect. (SJT)
+ */
+
+/* MSC Medium model */
+#ifdef FAR
+#  ifdef M_I86MM
+#    define USE_FAR_KEYWORD
+#    define FARDATA FAR
+#    include <dos.h>
+#  endif
+#endif
+
+/* SJT: default case */
+#ifndef FAR
+#  define FAR
+#endif
+
+/* At this point FAR is always defined */
+#ifndef FARDATA
+#  define FARDATA
+#endif
+
+/* Typedef for floating-point numbers that are converted
+   to fixed-point with a multiple of 100,000, e.g., int_gamma */
+typedef png_int_32 png_fixed_point;
+
+/* Add typedefs for pointers */
+typedef void            FAR * png_voidp;
+typedef png_byte        FAR * png_bytep;
+typedef png_uint_32     FAR * png_uint_32p;
+typedef png_int_32      FAR * png_int_32p;
+typedef png_uint_16     FAR * png_uint_16p;
+typedef png_int_16      FAR * png_int_16p;
+typedef PNG_CONST char  FAR * png_const_charp;
+typedef char            FAR * png_charp;
+typedef png_fixed_point FAR * png_fixed_point_p;
+
+#ifndef PNG_NO_STDIO
+typedef FILE                * png_FILE_p;
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * png_doublep;
+#endif
+
+/* Pointers to pointers; i.e. arrays */
+typedef png_byte        FAR * FAR * png_bytepp;
+typedef png_uint_32     FAR * FAR * png_uint_32pp;
+typedef png_int_32      FAR * FAR * png_int_32pp;
+typedef png_uint_16     FAR * FAR * png_uint_16pp;
+typedef png_int_16      FAR * FAR * png_int_16pp;
+typedef PNG_CONST char  FAR * FAR * png_const_charpp;
+typedef char            FAR * FAR * png_charpp;
+typedef png_fixed_point FAR * FAR * png_fixed_point_pp;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * FAR * png_doublepp;
+#endif
+
+/* Pointers to pointers to pointers; i.e., pointer to array */
+typedef char            FAR * FAR * FAR * png_charppp;
+
+/* Define PNG_BUILD_DLL if the module being built is a Windows
+ * LIBPNG DLL.
+ *
+ * Define PNG_USE_DLL if you want to *link* to the Windows LIBPNG DLL.
+ * It is equivalent to Microsoft predefined macro _DLL that is
+ * automatically defined when you compile using the share
+ * version of the CRT (C Run-Time library)
+ *
+ * The cygwin mods make this behavior a little different:
+ * Define PNG_BUILD_DLL if you are building a dll for use with cygwin
+ * Define PNG_STATIC if you are building a static library for use with cygwin,
+ *   -or- if you are building an application that you want to link to the
+ *   static library.
+ * PNG_USE_DLL is defined by default (no user action needed) unless one of
+ *   the other flags is defined.
+ */
+
+#if !defined(PNG_DLL) && (defined(PNG_BUILD_DLL) || defined(PNG_USE_DLL))
+#  define PNG_DLL
+#endif
+
+#ifdef __CYGWIN__
+#  undef PNGAPI
+#  define PNGAPI __cdecl
+#  undef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif
+
+#define PNG_USE_LOCAL_ARRAYS /* Not used in libpng, defined for legacy apps */
+
+/* If you define PNGAPI, e.g., with compiler option "-DPNGAPI=__stdcall",
+ * you may get warnings regarding the linkage of png_zalloc and png_zfree.
+ * Don't ignore those warnings; you must also reset the default calling
+ * convention in your compiler to match your PNGAPI, and you must build
+ * zlib and your applications the same way you build libpng.
+ */
+
+#if defined(__MINGW32__) && !defined(PNG_MODULEDEF)
+#  ifndef PNG_NO_MODULEDEF
+#    define PNG_NO_MODULEDEF
+#  endif
+#endif
+
+#if !defined(PNG_IMPEXP) && defined(PNG_BUILD_DLL) && !defined(PNG_NO_MODULEDEF)
+#  define PNG_IMPEXP
+#endif
+
+#if defined(PNG_DLL) || defined(_DLL) || defined(__DLL__ ) || \
+    (( defined(_Windows) || defined(_WINDOWS) || \
+       defined(WIN32) || defined(_WIN32) || defined(__WIN32__) ))
+
+#  ifndef PNGAPI
+#     if defined(__GNUC__) || (defined (_MSC_VER) && (_MSC_VER >= 800))
+#        define PNGAPI __cdecl
+#     else
+#        define PNGAPI _cdecl
+#     endif
+#  endif
+
+#  if !defined(PNG_IMPEXP) && (!defined(PNG_DLL) || \
+       0 /* WINCOMPILER_WITH_NO_SUPPORT_FOR_DECLIMPEXP */)
+#     define PNG_IMPEXP
+#  endif
+
+#  ifndef PNG_IMPEXP
+
+#    define PNG_EXPORT_TYPE1(type,symbol)  PNG_IMPEXP type PNGAPI symbol
+#    define PNG_EXPORT_TYPE2(type,symbol)  type PNG_IMPEXP PNGAPI symbol
+
+     /* Borland/Microsoft */
+#    if defined(_MSC_VER) || defined(__BORLANDC__)
+#      if (_MSC_VER >= 800) || (__BORLANDC__ >= 0x500)
+#         define PNG_EXPORT PNG_EXPORT_TYPE1
+#      else
+#         define PNG_EXPORT PNG_EXPORT_TYPE2
+#         ifdef PNG_BUILD_DLL
+#            define PNG_IMPEXP __export
+#         else
+#            define PNG_IMPEXP /*__import */ /* doesn't exist AFAIK in VC++ */
+#         endif                              /* Exists in Borland C++ for
+                                                C++ classes (== huge) */
+#      endif
+#    endif
+
+#    ifndef PNG_IMPEXP
+#      ifdef PNG_BUILD_DLL
+#        define PNG_IMPEXP __declspec(dllexport)
+#      else
+#        define PNG_IMPEXP __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* PNG_IMPEXP */
+#else /* !(DLL || non-cygwin WINDOWS) */
+#   if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
+#     ifndef PNGAPI
+#       define PNGAPI _System
+#     endif
+#   else
+#     if 0 /* ... other platforms, with other meanings */
+#     endif
+#   endif
+#endif
+
+#ifndef PNGAPI
+#  define PNGAPI
+#endif
+#ifndef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif
+
+#ifdef PNG_BUILDSYMS
+#  ifndef PNG_EXPORT
+#    define PNG_EXPORT(type,symbol) PNG_FUNCTION_EXPORT symbol END
+#  endif
+#endif
+
+#ifndef PNG_EXPORT
+#  define PNG_EXPORT(type,symbol) PNG_IMPEXP type PNGAPI symbol
+#endif
+
+/* Support for compiler specific function attributes.  These are used
+ * so that where compiler support is available incorrect use of API
+ * functions in png.h will generate compiler warnings.
+ *
+ * Added at libpng-1.2.41.
+ */
+
+#ifndef PNG_NO_PEDANTIC_WARNINGS
+#  ifndef PNG_PEDANTIC_WARNINGS_SUPPORTED
+#    define PNG_PEDANTIC_WARNINGS_SUPPORTED
+#  endif
+#endif
+
+#ifdef PNG_PEDANTIC_WARNINGS_SUPPORTED
+/* Support for compiler specific function attributes.  These are used
+ * so that where compiler support is available incorrect use of API
+ * functions in png.h will generate compiler warnings.  Added at libpng
+ * version 1.2.41.
+ */
+#  ifdef __GNUC__
+#    ifndef PNG_USE_RESULT
+#      define PNG_USE_RESULT __attribute__((__warn_unused_result__))
+#    endif
+#    ifndef PNG_NORETURN
+#      define PNG_NORETURN   __attribute__((__noreturn__))
+#    endif
+#    ifndef PNG_ALLOCATED
+#      define PNG_ALLOCATED  __attribute__((__malloc__))
+#    endif
+
+    /* This specifically protects structure members that should only be
+     * accessed from within the library, therefore should be empty during
+     * a library build.
+     */
+#    ifndef PNG_DEPRECATED
+#      define PNG_DEPRECATED __attribute__((__deprecated__))
+#    endif
+#    ifndef PNG_DEPSTRUCT
+#      define PNG_DEPSTRUCT  __attribute__((__deprecated__))
+#    endif
+#    ifndef PNG_PRIVATE
+#      if 0 /* Doesn't work so we use deprecated instead*/
+#        define PNG_PRIVATE \
+          __attribute__((warning("This function is not exported by libpng.")))
+#      else
+#        define PNG_PRIVATE \
+          __attribute__((__deprecated__))
+#      endif
+#    endif /* PNG_PRIVATE */
+#  endif /* __GNUC__ */
+#endif /* PNG_PEDANTIC_WARNINGS */
+
+#ifndef PNG_DEPRECATED
+#  define PNG_DEPRECATED  /* Use of this function is deprecated */
+#endif
+#ifndef PNG_USE_RESULT
+#  define PNG_USE_RESULT  /* The result of this function must be checked */
+#endif
+#ifndef PNG_NORETURN
+#  define PNG_NORETURN    /* This function does not return */
+#endif
+#ifndef PNG_ALLOCATED
+#  define PNG_ALLOCATED   /* The result of the function is new memory */
+#endif
+#ifndef PNG_DEPSTRUCT
+#  define PNG_DEPSTRUCT   /* Access to this struct member is deprecated */
+#endif
+#ifndef PNG_PRIVATE
+#  define PNG_PRIVATE     /* This is a private libpng function */
+#endif
+
+/* Users may want to use these so they are not private.  Any library
+ * functions that are passed far data must be model-independent.
+ */
+
+/* memory model/platform independent fns */
+#ifndef PNG_ABORT
+#  ifdef _WINDOWS_
+#     define PNG_ABORT() ExitProcess(0)
+#  else
+#     define PNG_ABORT() abort()
+#  endif
+#endif
+
+#ifdef USE_FAR_KEYWORD
+/* Use this to make far-to-near assignments */
+#  define CHECK   1
+#  define NOCHECK 0
+#  define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK))
+#  define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK))
+#  define png_strcpy  _fstrcpy
+#  define png_strncpy _fstrncpy   /* Added to v 1.2.6 */
+#  define png_strlen  _fstrlen
+#  define png_memcmp  _fmemcmp    /* SJT: added */
+#  define png_memcpy  _fmemcpy
+#  define png_memset  _fmemset
+#  define png_sprintf sprintf
+#else
+#  ifdef _WINDOWS_  /* Favor Windows over C runtime fns */
+#    define CVT_PTR(ptr)         (ptr)
+#    define CVT_PTR_NOCHECK(ptr) (ptr)
+#    define png_strcpy  lstrcpyA
+#    define png_strncpy lstrcpynA
+#    define png_strlen  lstrlenA
+#    define png_memcmp  memcmp
+#    define png_memcpy  CopyMemory
+#    define png_memset  memset
+#    define png_sprintf wsprintfA
+#  else
+#    define CVT_PTR(ptr)         (ptr)
+#    define CVT_PTR_NOCHECK(ptr) (ptr)
+#    define png_strcpy  strcpy
+#    define png_strncpy strncpy     /* Added to v 1.2.6 */
+#    define png_strlen  strlen
+#    define png_memcmp  memcmp      /* SJT: added */
+#    define png_memcpy  memcpy
+#    define png_memset  memset
+#    define png_sprintf sprintf
+#    ifndef PNG_NO_SNPRINTF
+#      ifdef _MSC_VER
+#        define png_snprintf _snprintf   /* Added to v 1.2.19 */
+#        define png_snprintf2 _snprintf
+#        define png_snprintf6 _snprintf
+#      else
+#        define png_snprintf snprintf   /* Added to v 1.2.19 */
+#        define png_snprintf2 snprintf
+#        define png_snprintf6 snprintf
+#      endif
+#    else
+       /* You don't have or don't want to use snprintf().  Caution: Using
+        * sprintf instead of snprintf exposes your application to accidental
+        * or malevolent buffer overflows.  If you don't have snprintf()
+        * as a general rule you should provide one (you can get one from
+        * Portable OpenSSH).
+        */
+#      define png_snprintf(s1,n,fmt,x1) sprintf(s1,fmt,x1)
+#      define png_snprintf2(s1,n,fmt,x1,x2) sprintf(s1,fmt,x1,x2)
+#      define png_snprintf6(s1,n,fmt,x1,x2,x3,x4,x5,x6) \
+          sprintf(s1,fmt,x1,x2,x3,x4,x5,x6)
+#    endif
+#  endif
+#endif
+
+/* png_alloc_size_t is guaranteed to be no smaller than png_size_t,
+ * and no smaller than png_uint_32.  Casts from png_size_t or png_uint_32
+ * to png_alloc_size_t are not necessary; in fact, it is recommended
+ * not to use them at all so that the compiler can complain when something
+ * turns out to be problematic.
+ * Casts in the other direction (from png_alloc_size_t to png_size_t or
+ * png_uint_32) should be explicitly applied; however, we do not expect
+ * to encounter practical situations that require such conversions.
+ */
+#if defined(__TURBOC__) && !defined(__FLAT__)
+#  define  png_mem_alloc farmalloc
+#  define  png_mem_free  farfree
+   typedef unsigned long png_alloc_size_t;
+#else
+#  if defined(_MSC_VER) && defined(MAXSEG_64K)
+#    define  png_mem_alloc(s) halloc(s, 1)
+#    define  png_mem_free     hfree
+     typedef unsigned long    png_alloc_size_t;
+#  else
+#    if defined(_WINDOWS_) && (!defined(INT_MAX) || INT_MAX <= 0x7ffffffeL)
+#      define  png_mem_alloc(s) HeapAlloc(GetProcessHeap(), 0, s)
+#      define  png_mem_free(p)  HeapFree(GetProcessHeap(), 0, p)
+       typedef DWORD            png_alloc_size_t;
+#    else
+#      define  png_mem_alloc malloc
+#      define  png_mem_free  free
+       typedef png_size_t    png_alloc_size_t;
+#    endif
+#  endif
+#endif
+/* End of memory model/platform independent support */
+
+/* Just a little check that someone hasn't tried to define something
+ * contradictory.
+ */
+#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
+#  undef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 65536L
+#endif
+
+
+/* Added at libpng-1.2.8 */
+#endif /* PNG_VERSION_INFO_ONLY */
+
+#endif /* PNGCONF_H */
diff --git a/GLideNHQ/inc/zconf.h b/GLideNHQ/inc/zconf.h
new file mode 100644
index 00000000..03a9431c
--- /dev/null
+++ b/GLideNHQ/inc/zconf.h
@@ -0,0 +1,332 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ */
+#ifdef Z_PREFIX
+#  define deflateInit_          z_deflateInit_
+#  define deflate               z_deflate
+#  define deflateEnd            z_deflateEnd
+#  define inflateInit_          z_inflateInit_
+#  define inflate               z_inflate
+#  define inflateEnd            z_inflateEnd
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateCopy           z_deflateCopy
+#  define deflateReset          z_deflateReset
+#  define deflateParams         z_deflateParams
+#  define deflateBound          z_deflateBound
+#  define deflatePrime          z_deflatePrime
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateCopy           z_inflateCopy
+#  define inflateReset          z_inflateReset
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define compress              z_compress
+#  define compress2             z_compress2
+#  define compressBound         z_compressBound
+#  define uncompress            z_uncompress
+#  define adler32               z_adler32
+#  define crc32                 z_crc32
+#  define get_crc_table         z_get_crc_table
+#  define zError                z_zError
+
+#  define alloc_func            z_alloc_func
+#  define free_func             z_free_func
+#  define in_func               z_in_func
+#  define out_func              z_out_func
+#  define Byte                  z_Byte
+#  define uInt                  z_uInt
+#  define uLong                 z_uLong
+#  define Bytef                 z_Bytef
+#  define charf                 z_charf
+#  define intf                  z_intf
+#  define uIntf                 z_uIntf
+#  define uLongf                z_uLongf
+#  define voidpf                z_voidpf
+#  define voidp                 z_voidp
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
+#  define NO_DUMMY_DECL
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if 0           /* HAVE_UNISTD_H -- this line is updated by ./configure */
+#  include <sys/types.h> /* for off_t */
+#  include <unistd.h>    /* for SEEK_* and off_t */
+#  ifdef VMS
+#    include <unixio.h>   /* for off_t */
+#  endif
+#  define z_off_t off_t
+#endif
+#ifndef SEEK_SET
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if defined(__OS400__)
+#  define NO_vsnprintf
+#endif
+
+#if defined(__MVS__)
+#  define NO_vsnprintf
+#  ifdef FAR
+#    undef FAR
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+#   pragma map(deflateInit_,"DEIN")
+#   pragma map(deflateInit2_,"DEIN2")
+#   pragma map(deflateEnd,"DEEND")
+#   pragma map(deflateBound,"DEBND")
+#   pragma map(inflateInit_,"ININ")
+#   pragma map(inflateInit2_,"ININ2")
+#   pragma map(inflateEnd,"INEND")
+#   pragma map(inflateSync,"INSY")
+#   pragma map(inflateSetDictionary,"INSEDI")
+#   pragma map(compressBound,"CMBND")
+#   pragma map(inflate_table,"INTABL")
+#   pragma map(inflate_fast,"INFA")
+#   pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/GLideNHQ/inc/zlib.h b/GLideNHQ/inc/zlib.h
new file mode 100644
index 00000000..02281792
--- /dev/null
+++ b/GLideNHQ/inc/zlib.h
@@ -0,0 +1,1357 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.3, July 18th, 2005
+
+  Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+
+  The data format used by the zlib library is described by RFCs (Request for
+  Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
+  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.3"
+#define ZLIB_VERNUM 0x1230
+
+/*
+     The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed
+  data.  This version of the library supports only one compression method
+  (deflation) but other algorithms will be added later and will have the same
+  stream interface.
+
+     Compression can be done in a single step if the buffers are large
+  enough (for example if an input file is mmap'ed), or can be done by
+  repeated calls of the compression function.  In the latter case, the
+  application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+     The compressed data format used by default by the in-memory functions is
+  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+  around a deflate stream, which is itself documented in RFC 1951.
+
+     The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio using the functions that start
+  with "gz".  The gzip format is different from the zlib format.  gzip is a
+  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+     This library can optionally read and write gzip streams in memory as well.
+
+     The zlib format was designed to be compact and fast for use in memory
+  and on communications channels.  The gzip format was designed for single-
+  file compression on file systems, has a larger header than zlib to maintain
+  directory information, and uses a different, slower check method than zlib.
+
+     The library does not install any signal handler. The decoder checks
+  the consistency of the compressed data, so the library should never
+  crash even in case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    Bytef    *next_in;  /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total nb of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte should be put there */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total nb of bytes output so far */
+
+    char     *msg;      /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: binary or text */
+    uLong   adler;      /* adler32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+     gzip header information passed to and from zlib routines.  See RFC 1952
+  for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+    int     text;       /* true if compressed data believed to be text */
+    uLong   time;       /* modification time */
+    int     xflags;     /* extra flags (not used when writing a gzip file) */
+    int     os;         /* operating system */
+    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
+    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
+    uInt    extra_max;  /* space at extra (only when reading header) */
+    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
+    uInt    name_max;   /* space at name (only when reading header) */
+    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
+    uInt    comm_max;   /* space at comment (only when reading header) */
+    int     hcrc;       /* true if there was or will be a header crc */
+    int     done;       /* true when done reading gzip header (not used
+                           when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+   The application must update next_in and avail_in when avail_in has
+   dropped to zero. It must update next_out and avail_out when avail_out
+   has dropped to zero. The application must initialize zalloc, zfree and
+   opaque before calling the init function. All other fields are set by the
+   compression library and must not be updated by the application.
+
+   The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree. This can be useful for custom
+   memory management. The compression library attaches no meaning to the
+   opaque value.
+
+   zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.
+
+   On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this
+   if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
+   pointers returned by zalloc for objects of exactly 65536 bytes *must*
+   have their offset normalized to zero. The default allocation function
+   provided by this library ensures this (see zutil.c). To reduce memory
+   requirements and avoid any allocation of 64K objects, at the expense of
+   compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
+
+   The fields total_in and total_out can be used for statistics or
+   progress reports. After compression, total_in holds the total size of
+   the uncompressed data and may be saved for use in the decompressor
+   (particularly if the decompressor wants to decompress everything in
+   a single step).
+*/
+
+                        /* constants */
+
+#define Z_NO_FLUSH      0
+#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
+#define Z_SYNC_FLUSH    2
+#define Z_FULL_FLUSH    3
+#define Z_FINISH        4
+#define Z_BLOCK         5
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative
+ * values are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+/* compression levels */
+
+#define Z_FILTERED            1
+#define Z_HUFFMAN_ONLY        2
+#define Z_RLE                 3
+#define Z_FIXED               4
+#define Z_DEFAULT_STRATEGY    0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY   0
+#define Z_TEXT     1
+#define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN  2
+/* Possible values of the data_type field (though see inflate()) */
+
+#define Z_DEFLATED   8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is
+   not compatible with the zlib.h header file used by the application.
+   This check is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+     Initializes the internal stream state for compression. The fields
+   zalloc, zfree and opaque must be initialized before by the caller.
+   If zalloc and zfree are set to Z_NULL, deflateInit updates them to
+   use default allocation functions.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at
+   all (the input data is simply copied a block at a time).
+   Z_DEFAULT_COMPRESSION requests a default compromise between speed and
+   compression (currently equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if level is not a valid compression level,
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).
+   msg is set to null if there is no error message.  deflateInit does not
+   perform any compression: this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may introduce some
+  output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows. deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly. This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary (in interactive applications).
+    Some output may be provided even if flush is not set.
+
+  Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating avail_in or avail_out accordingly; avail_out
+  should never be zero before the call. The application can consume the
+  compressed output when it wants, for example when the output buffer is full
+  (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
+  and with zero avail_out, it must be called again after making room in the
+  output buffer because there might be more output pending.
+
+    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+  decide how much data to accumualte before producing output, in order to
+  maximize compression.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far. (In particular
+  avail_in is zero after the call if enough output space has been provided
+  before the call.)  Flushing may degrade compression for some compression
+  algorithms and so it should be used only when necessary.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+  compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+  avail_out is greater than six to avoid repeated flush markers due to
+  avail_out == 0 on return.
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there
+  was enough output space; if deflate returns with Z_OK, this function must be
+  called again with Z_FINISH and more output space (updated avail_out) but no
+  more input data, until it returns with Z_STREAM_END or an error. After
+  deflate has returned Z_STREAM_END, the only possible operations on the
+  stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used immediately after deflateInit if all the compression
+  is to be done in a single step. In this case, avail_out must be at least
+  the value returned by deflateBound (see below). If deflate does not return
+  Z_STREAM_END, then it must be called again as described above.
+
+    deflate() sets strm->adler to the adler32 checksum of all input read
+  so far (that is, total_in bytes).
+
+    deflate() may update strm->data_type if it can make a good guess about
+  the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
+  binary. This field is only for information purposes and does not affect
+  the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
+  (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
+  fatal, and deflate() can be called again with more input and more output
+  space to continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded). In the error case,
+   msg may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+     Initializes the internal stream state for decompression. The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
+   value depends on the compression method), inflateInit determines the
+   compression method from the zlib header and allocates all data structures
+   accordingly; otherwise the allocation will be deferred to the first call of
+   inflate.  If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+   use default allocation functions.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller.  msg is set to null if there is no error
+   message. inflateInit does not perform any decompression apart from reading
+   the zlib header if present: this will be done by inflate().  (So next_in and
+   avail_in may be modified, but next_out and avail_out are unchanged.)
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+  The detailed semantics are as follows. inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in is updated and processing
+    will resume at this point for the next call of inflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there
+    is no more input data or no more space in the output buffer (see below
+    about the flush parameter).
+
+  Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating the next_* and avail_* values accordingly.
+  The application can consume the uncompressed output when it wants, for
+  example when the output buffer is full (avail_out == 0), or after each
+  call of inflate(). If inflate returns Z_OK and with zero avail_out, it
+  must be called again after making room in the output buffer because there
+  might be more output pending.
+
+    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH,
+  Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much
+  output as possible to the output buffer. Z_BLOCK requests that inflate() stop
+  if and when it gets to the next deflate block boundary. When decoding the
+  zlib or gzip format, this will cause inflate() to return immediately after
+  the header and before the first block. When doing a raw inflate, inflate()
+  will go ahead and process the first block, and will return when it gets to
+  the end of that block, or when it runs out of data.
+
+    The Z_BLOCK option assists in appending to or combining deflate streams.
+  Also to assist in this, on return inflate() will set strm->data_type to the
+  number of unused bits in the last byte taken from strm->next_in, plus 64
+  if inflate() is currently decoding the last block in the deflate stream,
+  plus 128 if inflate() returned immediately after decoding an end-of-block
+  code or decoding the complete header up to just before the first byte of the
+  deflate stream. The end-of-block will not be indicated until all of the
+  uncompressed data from that block has been written to strm->next_out.  The
+  number of unused bits may in general be greater than seven, except when
+  bit 7 of data_type is set, in which case the number of unused bits will be
+  less than eight.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error. However if all decompression is to be performed in a single step
+  (a single call of inflate), the parameter flush should be set to
+  Z_FINISH. In this case all pending input is processed and all pending
+  output is flushed; avail_out must be large enough to hold all the
+  uncompressed data. (The size of the uncompressed data may have been saved
+  by the compressor for this purpose.) The next operation on this stream must
+  be inflateEnd to deallocate the decompression state. The use of Z_FINISH
+  is never required, but can be used to inform inflate that a faster approach
+  may be used for the single inflate() call.
+
+     In this implementation, inflate() always flushes as much output as
+  possible to the output buffer, and always uses the faster approach on the
+  first call. So the only effect of the flush parameter in this implementation
+  is on the return value of inflate(), as noted below, or when it returns early
+  because Z_BLOCK is used.
+
+     If a preset dictionary is needed after this call (see inflateSetDictionary
+  below), inflate sets strm->adler to the adler32 checksum of the dictionary
+  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+  strm->adler to the adler32 checksum of all output produced so far (that is,
+  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+  below. At the end of the stream, inflate() checks that its computed adler32
+  checksum is equal to that saved by the compressor and returns Z_STREAM_END
+  only if the checksum is correct.
+
+    inflate() will decompress and check either zlib-wrapped or gzip-wrapped
+  deflate data.  The header type is detected automatically.  Any information
+  contained in the gzip header is not retained, so applications that need that
+  information should instead use raw inflate, see inflateInit2() below, or
+  inflateBack() and perform their own processing of the gzip header and
+  trailer.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect check
+  value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
+  Z_BUF_ERROR if no progress is possible or if there was not enough room in the
+  output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
+  inflate() can be called again with more input and more output space to
+  continue decompressing. If Z_DATA_ERROR is returned, the application may then
+  call inflateSync() to look for a good compression block if a partial recovery
+  of the data is desired.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+   was inconsistent. In the error case, msg may be set but then points to a
+   static string (which must not be deallocated).
+*/
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+                                     int  level,
+                                     int  method,
+                                     int  windowBits,
+                                     int  memLevel,
+                                     int  strategy));
+
+     This is another version of deflateInit with more compression options. The
+   fields next_in, zalloc, zfree and opaque must be initialized before by
+   the caller.
+
+     The method parameter is the compression method. It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer). It should be in the range 8..15 for this
+   version of the library. Larger values of this parameter result in better
+   compression at the expense of memory usage. The default value is 15 if
+   deflateInit is used instead.
+
+     windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
+   determines the window size. deflate() will then generate raw deflate data
+   with no zlib header or trailer, and will not compute an adler32 check value.
+
+     windowBits can also be greater than 15 for optional gzip encoding. Add
+   16 to windowBits to write a simple gzip header and trailer around the
+   compressed data instead of a zlib wrapper. The gzip header will have no
+   file name, no extra data, no comment, no modification time (set to zero),
+   no header crc, and the operating system will be set to 255 (unknown).  If a
+   gzip stream is being written, strm->adler is a crc32 instead of an adler32.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state. memLevel=1 uses minimum memory but
+   is slow and reduces compression ratio; memLevel=9 uses maximum memory
+   for optimal speed. The default value is 8. See zconf.h for total memory
+   usage as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm. Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match), or Z_RLE to limit match distances to one (run-length
+   encoding). Filtered data consists mostly of small values with a somewhat
+   random distribution. In this case, the compression algorithm is tuned to
+   compress them better. The effect of Z_FILTERED is to force more Huffman
+   coding and less string matching; it is somewhat intermediate between
+   Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as
+   Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy
+   parameter only affects the compression ratio but not the correctness of the
+   compressed output even if it is not set appropriately.  Z_FIXED prevents the
+   use of dynamic Huffman codes, allowing for a simpler decoder for special
+   applications.
+
+      deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
+   method). msg is set to null if there is no error message.  deflateInit2 does
+   not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output. This function must be called
+   immediately after deflateInit, deflateInit2 or deflateReset, before any
+   call of deflate. The compressor and decompressor must use exactly the same
+   dictionary (see inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary. Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size in
+   deflate or deflate2. Thus the strings most likely to be useful should be
+   put at the end of the dictionary, not at the front. In addition, the
+   current implementation of deflate will use at most the window size minus
+   262 bytes of the provided dictionary.
+
+     Upon return of this function, strm->adler is set to the adler32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor. (The adler32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.) If a raw deflate was requested, then the
+   adler32 value is not computed and strm->adler is not set.
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if the compression method is bsort). deflateSetDictionary does not
+   perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter. The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and
+   can consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being NULL). msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to deflateEnd followed by deflateInit,
+   but does not free and reallocate all the internal compression state.
+   The stream will keep the same compression level and any other attributes
+   that may have been set by deflateInit2.
+
+      deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+                                      int level,
+                                      int strategy));
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2.  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different
+   strategy. If the compression level is changed, the input available so far
+   is compressed with the old level (and may be flushed); the new level will
+   take effect only at the next call of deflate().
+
+     Before the call of deflateParams, the stream state must be set as for
+   a call of deflate(), since the currently available input may have to
+   be compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
+   if strm->avail_out was zero.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+                                    int good_length,
+                                    int max_lazy,
+                                    int nice_length,
+                                    int max_chain));
+/*
+     Fine tune deflate's internal compression parameters.  This should only be
+   used by someone who understands the algorithm used by zlib's deflate for
+   searching for the best matching string, and even then only by the most
+   fanatic optimizer trying to squeeze out the last compressed bit for their
+   specific input data.  Read the deflate.c source code for the meaning of the
+   max_lazy, good_length, nice_length, and max_chain parameters.
+
+     deflateTune() can be called after deflateInit() or deflateInit2(), and
+   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+                                       uLong sourceLen));
+/*
+     deflateBound() returns an upper bound on the compressed size after
+   deflation of sourceLen bytes.  It must be called after deflateInit()
+   or deflateInit2().  This would be used to allocate an output buffer
+   for deflation in a single pass, and so would be called before deflate().
+*/
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     deflatePrime() inserts bits in the deflate output stream.  The intent
+  is that this function is used to start off the deflate output with the
+  bits leftover from a previous deflate stream when appending to it.  As such,
+  this function can only be used for raw deflate, and must be used before the
+  first deflate() call after a deflateInit2() or deflateReset().  bits must be
+  less than or equal to 16, and that many of the least significant bits of
+  value will be inserted in the output.
+
+      deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+      deflateSetHeader() provides gzip header information for when a gzip
+   stream is requested by deflateInit2().  deflateSetHeader() may be called
+   after deflateInit2() or deflateReset() and before the first call of
+   deflate().  The text, time, os, extra field, name, and comment information
+   in the provided gz_header structure are written to the gzip header (xflag is
+   ignored -- the extra flags are set according to the compression level).  The
+   caller must assure that, if not Z_NULL, name and comment are terminated with
+   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+   available there.  If hcrc is true, a gzip header crc is included.  Note that
+   the current versions of the command-line version of gzip (up through version
+   1.3.x) do not support header crc's, and will report that it is a "multi-part
+   gzip file" and give up.
+
+      If deflateSetHeader is not used, the default gzip header has text false,
+   the time set to zero, and os set to 255, with no extra, name, or comment
+   fields.  The gzip header is returned to the default state by deflateReset().
+
+      deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+                                     int  windowBits));
+
+     This is another version of inflateInit with an extra parameter. The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library. The default value is 15 if inflateInit is used
+   instead. windowBits must be greater than or equal to the windowBits value
+   provided to deflateInit2() while compressing, or it must be equal to 15 if
+   deflateInit2() was not used. If a compressed stream with a larger window
+   size is given as input, inflate() will return with the error code
+   Z_DATA_ERROR instead of trying to allocate a larger window.
+
+     windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
+   determines the window size. inflate() will then process raw deflate data,
+   not looking for a zlib or gzip header, not generating a check value, and not
+   looking for any check values for comparison at the end of the stream. This
+   is for use with other formats that use the deflate compressed data format
+   such as zip.  Those formats provide their own check values. If a custom
+   format is developed using the raw deflate format for compressed data, it is
+   recommended that a check value such as an adler32 or a crc32 be applied to
+   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
+   most applications, the zlib format should be used as is. Note that comments
+   above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+     windowBits can also be greater than 15 for optional gzip decoding. Add
+   32 to windowBits to enable zlib and gzip decoding with automatic header
+   detection, or add 16 to decode only the gzip format (the zlib format will
+   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is
+   a crc32 instead of an adler32.
+
+     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg
+   is set to null if there is no error message.  inflateInit2 does not perform
+   any decompression apart from reading the zlib header if present: this will
+   be done by inflate(). (So next_in and avail_in may be modified, but next_out
+   and avail_out are unchanged.)
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence. This function must be called immediately after a call of inflate,
+   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
+   can be determined from the adler32 value returned by that call of inflate.
+   The compressor and decompressor must use exactly the same dictionary (see
+   deflateSetDictionary).  For raw inflate, this function can be called
+   immediately after inflateInit2() or inflateReset() and before any call of
+   inflate() to set the dictionary.  The application must insure that the
+   dictionary that was used for compression is provided.
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect adler32 value). inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+    Skips invalid compressed data until a full flush point (see above the
+  description of deflate with Z_FULL_FLUSH) can be found, or until all
+  available input is skipped. No output is provided.
+
+    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
+  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
+  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
+  case, the application may save the current current value of total_in which
+  indicates where valid compressed data was found. In the error case, the
+  application may repeatedly call inflateSync, providing more input each time,
+  until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when randomly accessing a large stream.  The
+   first pass through the stream can periodically record the inflate state,
+   allowing restarting inflate at those points when randomly accessing the
+   stream.
+
+     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being NULL). msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate all the internal decompression state.
+   The stream will keep attributes that may have been set by inflateInit2.
+
+      inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     This function inserts bits in the inflate input stream.  The intent is
+  that this function is used to start inflating at a bit position in the
+  middle of a byte.  The provided bits will be used before any bytes are used
+  from next_in.  This function should only be used with raw inflate, and
+  should be used before the first inflate() call after inflateInit2() or
+  inflateReset().  bits must be less than or equal to 16, and that many of the
+  least significant bits of value will be inserted in the input.
+
+      inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+      inflateGetHeader() requests that gzip header information be stored in the
+   provided gz_header structure.  inflateGetHeader() may be called after
+   inflateInit2() or inflateReset(), and before the first call of inflate().
+   As inflate() processes the gzip stream, head->done is zero until the header
+   is completed, at which time head->done is set to one.  If a zlib stream is
+   being decoded, then head->done is set to -1 to indicate that there will be
+   no gzip header information forthcoming.  Note that Z_BLOCK can be used to
+   force inflate() to return immediately after header processing is complete
+   and before any actual data is decompressed.
+
+      The text, time, xflags, and os fields are filled in with the gzip header
+   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
+   was valid if done is set to one.)  If extra is not Z_NULL, then extra_max
+   contains the maximum number of bytes to write to extra.  Once done is true,
+   extra_len contains the actual extra field length, and extra contains the
+   extra field, or that field truncated if extra_max is less than extra_len.
+   If name is not Z_NULL, then up to name_max characters are written there,
+   terminated with a zero unless the length is greater than name_max.  If
+   comment is not Z_NULL, then up to comm_max characters are written there,
+   terminated with a zero unless the length is greater than comm_max.  When
+   any of extra, name, or comment are not Z_NULL and the respective field is
+   not present in the header, then that field is set to Z_NULL to signal its
+   absence.  This allows the use of deflateSetHeader() with the returned
+   structure to duplicate the header.  However if those fields are set to
+   allocated memory, then the application will need to save those pointers
+   elsewhere so that they can be eventually freed.
+
+      If inflateGetHeader is not used, then the header information is simply
+   discarded.  The header is always checked for validity, including the header
+   CRC if present.  inflateReset() will reset the process to discard the header
+   information.  The application would need to call inflateGetHeader() again to
+   retrieve the header from the next gzip stream.
+
+      inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+                                        unsigned char FAR *window));
+
+     Initialize the internal stream state for decompression using inflateBack()
+   calls.  The fields zalloc, zfree and opaque in strm must be initialized
+   before the call.  If zalloc and zfree are Z_NULL, then the default library-
+   derived memory allocation routines are used.  windowBits is the base two
+   logarithm of the window size, in the range 8..15.  window is a caller
+   supplied buffer of that size.  Except for special applications where it is
+   assured that deflate was used with small window sizes, windowBits must be 15
+   and a 32K byte window must be supplied to be able to decompress general
+   deflate streams.
+
+     See inflateBack() for the usage of these routines.
+
+     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+   the paramaters are invalid, Z_MEM_ERROR if the internal state could not
+   be allocated, or Z_VERSION_ERROR if the version of the library does not
+   match the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+                                    in_func in, void FAR *in_desc,
+                                    out_func out, void FAR *out_desc));
+/*
+     inflateBack() does a raw inflate with a single call using a call-back
+   interface for input and output.  This is more efficient than inflate() for
+   file i/o applications in that it avoids copying between the output and the
+   sliding window by simply making the window itself the output buffer.  This
+   function trusts the application to not change the output buffer passed by
+   the output function, at least until inflateBack() returns.
+
+     inflateBackInit() must be called first to allocate the internal state
+   and to initialize the state with the user-provided window buffer.
+   inflateBack() may then be used multiple times to inflate a complete, raw
+   deflate stream with each call.  inflateBackEnd() is then called to free
+   the allocated state.
+
+     A raw deflate stream is one with no zlib or gzip header or trailer.
+   This routine would normally be used in a utility that reads zip or gzip
+   files and writes out uncompressed files.  The utility would decode the
+   header and process the trailer on its own, hence this routine expects
+   only the raw deflate stream to decompress.  This is different from the
+   normal behavior of inflate(), which expects either a zlib or gzip header and
+   trailer around the deflate stream.
+
+     inflateBack() uses two subroutines supplied by the caller that are then
+   called by inflateBack() for input and output.  inflateBack() calls those
+   routines until it reads a complete deflate stream and writes out all of the
+   uncompressed data, or until it encounters an error.  The function's
+   parameters and return types are defined above in the in_func and out_func
+   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
+   number of bytes of provided input, and a pointer to that input in buf.  If
+   there is no input available, in() must return zero--buf is ignored in that
+   case--and inflateBack() will return a buffer error.  inflateBack() will call
+   out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].  out()
+   should return zero on success, or non-zero on failure.  If out() returns
+   non-zero, inflateBack() will return with an error.  Neither in() nor out()
+   are permitted to change the contents of the window provided to
+   inflateBackInit(), which is also the buffer that out() uses to write from.
+   The length written by out() will be at most the window size.  Any non-zero
+   amount of input may be provided by in().
+
+     For convenience, inflateBack() can be provided input on the first call by
+   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
+   in() will be called.  Therefore strm->next_in must be initialized before
+   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
+   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
+   must also be initialized, and then if strm->avail_in is not zero, input will
+   initially be taken from strm->next_in[0 .. strm->avail_in - 1].
+
+     The in_desc and out_desc parameters of inflateBack() is passed as the
+   first parameter of in() and out() respectively when they are called.  These
+   descriptors can be optionally used to pass any information that the caller-
+   supplied in() and out() functions need to do their job.
+
+     On return, inflateBack() will set strm->next_in and strm->avail_in to
+   pass back any unused input that was provided by the last in() call.  The
+   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+   if in() or out() returned an error, Z_DATA_ERROR if there was a format
+   error in the deflate stream (in which case strm->msg is set to indicate the
+   nature of the error), or Z_STREAM_ERROR if the stream was not properly
+   initialized.  In the case of Z_BUF_ERROR, an input or output error can be
+   distinguished using strm->next_in which will be Z_NULL only if in() returned
+   an error.  If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to
+   out() returning non-zero.  (in() will always be called before out(), so
+   strm->next_in is assured to be defined if out() returns non-zero.)  Note
+   that inflateBack() cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+     All memory allocated by inflateBackInit() is freed.
+
+     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+   state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+     1.0: size of uInt
+     3.2: size of uLong
+     5.4: size of voidpf (pointer)
+     7.6: size of z_off_t
+
+    Compiler, assembler, and debug options:
+     8: DEBUG
+     9: ASMV or ASMINF -- use ASM code
+     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+     11: 0 (reserved)
+
+    One-time table building (smaller code, but not thread-safe if true):
+     12: BUILDFIXED -- build static block decoding tables when needed
+     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+     14,15: 0 (reserved)
+
+    Library content (indicates missing functionality):
+     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+                          deflate code when not needed)
+     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+                    and decode gzip streams (to avoid linking crc code)
+     18-19: 0 (reserved)
+
+    Operation variations (changes in library functionality):
+     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+     21: FASTEST -- deflate algorithm with only one, lowest compression level
+     22,23: 0 (reserved)
+
+    The sprintf variant used by gzprintf (zero is best):
+     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+    Remainder:
+     27-31: 0 (reserved)
+ */
+
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the
+   basic stream-oriented functions. To simplify the interface, some
+   default options are assumed (compression level and memory usage,
+   standard memory allocation functions). The source code of these
+   utility functions can easily be modified if you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be at least the value returned
+   by compressBound(sourceLen). Upon exit, destLen is the actual size of the
+   compressed buffer.
+     This function can be used to compress a whole file at once if the
+   input file is mmap'ed.
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+/*
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least the value returned by
+   compressBound(sourceLen). Upon exit, destLen is the actual size of the
+   compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+     compressBound() returns an upper bound on the compressed size after
+   compress() or compress2() on sourceLen bytes.  It would be used before
+   a compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.
+*/
+
+
+typedef voidp gzFile;
+
+ZEXTERN gzFile ZEXPORT gzopen  OF((const char *path, const char *mode));
+/*
+     Opens a gzip (.gz) file for reading or writing. The mode parameter
+   is as in fopen ("rb" or "wb") but can also include a compression level
+   ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
+   Huffman only compression as in "wb1h", or 'R' for run-length encoding
+   as in "wb1R". (See the description of deflateInit2 for more information
+   about the strategy parameter.)
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.
+
+     gzopen returns NULL if the file could not be opened or if there was
+   insufficient memory to allocate the (de)compression state; errno
+   can be checked to distinguish the two cases (if errno is zero, the
+   zlib error is Z_MEM_ERROR).  */
+
+ZEXTERN gzFile ZEXPORT gzdopen  OF((int fd, const char *mode));
+/*
+     gzdopen() associates a gzFile with the file descriptor fd.  File
+   descriptors are obtained from calls like open, dup, creat, pipe or
+   fileno (in the file has been previously opened with fopen).
+   The mode parameter is as in gzopen.
+     The next call of gzclose on the returned gzFile will also close the
+   file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
+   descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
+     gzdopen returns NULL if there was insufficient memory to allocate
+   the (de)compression state.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+     Dynamically update the compression level or strategy. See the description
+   of deflateInit2 for the meaning of these parameters.
+     gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
+   opened for writing.
+*/
+
+ZEXTERN int ZEXPORT    gzread  OF((gzFile file, voidp buf, unsigned len));
+/*
+     Reads the given number of uncompressed bytes from the compressed file.
+   If the input file was not in gzip format, gzread copies the given number
+   of bytes into the buffer.
+     gzread returns the number of uncompressed bytes actually read (0 for
+   end of file, -1 for error). */
+
+ZEXTERN int ZEXPORT    gzwrite OF((gzFile file,
+                                   voidpc buf, unsigned len));
+/*
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of uncompressed bytes actually written
+   (0 in case of error).
+*/
+
+ZEXTERN int ZEXPORTVA   gzprintf OF((gzFile file, const char *format, ...));
+/*
+     Converts, formats, and writes the args to the compressed file under
+   control of the format string, as in fprintf. gzprintf returns the number of
+   uncompressed bytes actually written (0 in case of error).  The number of
+   uncompressed bytes written is limited to 4095. The caller should assure that
+   this limit is not exceeded. If it is exceeded, then gzprintf() will return
+   return an error (0) with nothing written. In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   because the secure snprintf() or vsnprintf() functions were not available.
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+      Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+      gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+      Reads bytes from the compressed file until len-1 characters are read, or
+   a newline character is read and transferred to buf, or an end-of-file
+   condition is encountered.  The string is then terminated with a null
+   character.
+      gzgets returns buf, or Z_NULL in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzputc OF((gzFile file, int c));
+/*
+      Writes c, converted to an unsigned char, into the compressed file.
+   gzputc returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzgetc OF((gzFile file));
+/*
+      Reads one byte from the compressed file. gzgetc returns this byte
+   or -1 in case of end of file or error.
+*/
+
+ZEXTERN int ZEXPORT    gzungetc OF((int c, gzFile file));
+/*
+      Push one character back onto the stream to be read again later.
+   Only one character of push-back is allowed.  gzungetc() returns the
+   character pushed, or -1 on failure.  gzungetc() will fail if a
+   character has been pushed but not read yet, or if c is -1. The pushed
+   character will be discarded if the stream is repositioned with gzseek()
+   or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT    gzflush OF((gzFile file, int flush));
+/*
+     Flushes all pending output into the compressed file. The parameter
+   flush is as in the deflate() function. The return value is the zlib
+   error number (see function gzerror below). gzflush returns Z_OK if
+   the flush parameter is Z_FINISH and all output could be flushed.
+     gzflush should be called only when strictly necessary because it can
+   degrade compression.
+*/
+
+ZEXTERN z_off_t ZEXPORT    gzseek OF((gzFile file,
+                                      z_off_t offset, int whence));
+/*
+      Sets the starting position for the next gzread or gzwrite on the
+   given compressed file. The offset represents a number of bytes in the
+   uncompressed data stream. The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow. If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+      gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+/*
+     Rewinds the given file. This function is supported only for reading.
+
+   gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+/*
+     Returns the starting position for the next gzread or gzwrite on the
+   given compressed file. This position represents a number of bytes in the
+   uncompressed data stream.
+
+   gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+     Returns 1 when EOF has previously been detected reading the given
+   input stream, otherwise zero.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+     Returns 1 if file is being read directly without decompression, otherwise
+   zero.
+*/
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+/*
+     Flushes all pending output if necessary, closes the compressed file
+   and deallocates all the (de)compression state. The return value is the zlib
+   error number (see function gzerror below).
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+     Returns the error message for the last error which occurred on the
+   given compressed file. errnum is set to zlib error number. If an
+   error occurred in the file system and not in the compression library,
+   errnum is set to Z_ERRNO and the application may consult errno
+   to get the exact error code.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+     Clears the error and end-of-file flags for file. This is analogous to the
+   clearerr() function in stdio. This is useful for continuing to read a gzip
+   file that is being written concurrently.
+*/
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the
+   compression library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum. If buf is NULL, this function returns
+   the required initial value for the checksum.
+   An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+   much faster. Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+                                          z_off_t len2));
+/*
+     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
+   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
+   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.
+*/
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+/*
+     Update a running CRC-32 with the bytes buf[0..len-1] and return the
+   updated CRC-32. If buf is NULL, this function returns the required initial
+   value for the for the crc. Pre- and post-conditioning (one's complement) is
+   performed within this function so it shouldn't be done by the application.
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+/*
+     Combine two CRC-32 check values into one.  For two sequences of bytes,
+   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
+   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+   len2.
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+                                         unsigned char FAR *window,
+                                         const char *version,
+                                         int stream_size));
+#define deflateInit(strm, level) \
+        deflateInit_((strm), (level),       ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit(strm) \
+        inflateInit_((strm),                ZLIB_VERSION, sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+        deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                      (strategy),           ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+        inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
+#define inflateBackInit(strm, windowBits, window) \
+        inflateBackInit_((strm), (windowBits), (window), \
+        ZLIB_VERSION, sizeof(z_stream))
+
+
+#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
+    struct internal_state {int dummy;}; /* hack for buggy compilers */
+#endif
+
+ZEXTERN const char   * ZEXPORT zError           OF((int));
+ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp z));
+ZEXTERN const uLongf * ZEXPORT get_crc_table    OF((void));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
diff --git a/GLideNHQ/lib/dxtn.lib b/GLideNHQ/lib/dxtn.lib
new file mode 100644
index 00000000..960604a4
Binary files /dev/null and b/GLideNHQ/lib/dxtn.lib differ
diff --git a/GLideNHQ/lib/dxtnd.lib b/GLideNHQ/lib/dxtnd.lib
new file mode 100644
index 00000000..5234791f
Binary files /dev/null and b/GLideNHQ/lib/dxtnd.lib differ
diff --git a/GLideNHQ/lib/libpng.lib b/GLideNHQ/lib/libpng.lib
new file mode 100644
index 00000000..fe82d31d
Binary files /dev/null and b/GLideNHQ/lib/libpng.lib differ
diff --git a/GLideNHQ/lib/libpngd.lib b/GLideNHQ/lib/libpngd.lib
new file mode 100644
index 00000000..4cfdfc61
Binary files /dev/null and b/GLideNHQ/lib/libpngd.lib differ
diff --git a/GLideNHQ/lib/zlib.lib b/GLideNHQ/lib/zlib.lib
new file mode 100644
index 00000000..ecb973d8
Binary files /dev/null and b/GLideNHQ/lib/zlib.lib differ
diff --git a/GLideNHQ/lib/zlibd.lib b/GLideNHQ/lib/zlibd.lib
new file mode 100644
index 00000000..7eaf61b8
Binary files /dev/null and b/GLideNHQ/lib/zlibd.lib differ
diff --git a/GLideNHQ/tc-1.1+/Makefile.gcc b/GLideNHQ/tc-1.1+/Makefile.gcc
new file mode 100644
index 00000000..9e270b60
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/Makefile.gcc
@@ -0,0 +1,94 @@
+# Texture compression Linux makefile
+# Version:  1.1+
+#
+# Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+#
+# Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+#    Added support for RGBA input, DXT3,5 workaround for ATI Radeons,
+#    and _mesa_upscale_teximage2d speedup.
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+
+
+#
+#  Available options:
+#
+#    Environment variables:
+#       ARGB=1          enable support for ARGB inputs
+#                       default = no
+#       RADEON=1        enable DXT3,5 workaround for ATI Radeon
+#                       default = no
+#       YUV=1           use YUV to determine representative color
+#                       default = no
+#
+#    Targets:
+#	all:		build dynamic module
+#	clean:		remove object files
+#	realclean:	remove all generated files
+#
+
+
+.PHONY: all clean realclean
+
+DLLNAME = libdxtn.a
+
+CC = gcc
+CFLAGS = -Wall -W -pedantic -ansi -Wno-long-long
+CFLAGS += -O3 -ffast-math -funroll-loops
+#CFLAGS += -fomit-frame-pointer -fexpensive-optimizations
+CFLAGS += -fPIC -DPIC
+AR = ar
+RANLIB = ranlib
+
+ifdef ARGB
+CFLAGS += -DARGB
+endif
+
+ifdef RADEON
+CFLAGS += -DRADEON
+endif
+
+ifdef YUV
+CFLAGS += -DYUV
+endif
+
+LD = gcc
+LDFLAGS = -static
+LDLIBS =
+
+SOURCES = \
+	fxt1.c \
+	dxtn.c \
+	wrapper.c \
+	texstore.c
+
+OBJECTS = $(SOURCES:.c=.o)
+
+.c.o:
+	$(CC) -o $@ $(CFLAGS) -c $<
+
+all: $(DLLNAME)
+
+$(DLLNAME): $(OBJECTS)
+	$(AR) rc $@ $(OBJECTS)
+	$(RANLIB) $@
+
+clean:
+	-$(RM) $(OBJECTS)
+
+realclean: clean
+	-$(RM) $(DLLNAME)
+
+-include depend
diff --git a/GLideNHQ/tc-1.1+/Makefile.vc8 b/GLideNHQ/tc-1.1+/Makefile.vc8
new file mode 100644
index 00000000..a78fd694
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/Makefile.vc8
@@ -0,0 +1,143 @@
+# Texture compression Win32 makefile
+# Version:  1.1+
+#
+# Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+#
+# Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+#    Added support for RGBA input, DXT3,5 workaround for ATI Radeons,
+#    and _mesa_upscale_teximage2d speedup.
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+#
+#
+# This makefile MUST be processed by GNU make!!!
+#
+#  Available options:
+#
+#    Environment variables:
+#	DEBUG=1		enable debugging checks and messages
+#			default = no
+#       DLL=1           build dll
+#                       default = no
+#       ARGB=1          enable support for ARGB inputs
+#                       default = no
+#       RADEON=1        enable DXT3,5 workaround for ATI Radeon
+#                       default = no
+#       YUV=1           use YUV to determine representative color
+#                       default = no
+#
+#    Targets:
+#	all:		build everything
+#	clean:		remove object files
+#	realclean:	remove all generated files
+#
+
+.PHONY: all clean realclean
+.SUFFIXES: .c .obj
+
+ifdef DLL
+DLLNAME = dxtn.dll
+else
+ifdef DEBUG
+DLLNAME = dxtnd.lib
+else
+DLLNAME = dxtn.lib
+endif
+endif
+
+CC = cl
+AS = nasm
+ifdef DLL
+LD = _link
+else
+LD = lib
+endif
+RC = rc
+
+#ifeq ($(wildcard $(addsuffix /rm.exe,$(subst ;, ,$(PATH)))),)
+#UNLINK = del $(subst /,\,$(1))
+#else
+UNLINK = $(RM) $(1)
+#endif
+
+LDFLAGS = -nologo -machine:X86 -nodefaultlib
+
+ifdef DLL
+LDFLAGS += -dll -opt:WIN98
+endif
+
+ASFLAGS = -O6 -fwin32 -D__WIN32__ --prefix _
+ASFLAGS += $(CDEFS)
+
+CFLAGS  = -nologo -W3 -WX -D__MSC__=1 -D_CRT_SECURE_NO_DEPRECATE
+
+ifdef ARGB
+CFLAGS += -DARGB
+endif
+
+ifdef RADEON
+CFLAGS += -DRADEON
+endif
+
+ifdef YUV
+CFLAGS += -DYUV
+endif
+
+ifdef DLL
+CFLAGS += -D__WIN32__
+LDLIBS = user32.lib kernel32.lib
+endif
+
+ifdef DEBUG
+CFLAGS += -MTd -Zi -DDEBUG
+#LDFLAGS += -debug
+ifdef DLL
+LDLIBS += LIBCMTD.lib
+endif
+OPTFLAGS ?= -Od
+else
+CFLAGS += -DNDEBUG -MT -GL
+LDFLAGS += -ltcg:STATUS
+ifdef DLL
+LDLIBS += LIBCMT.lib
+endif
+OPTFLAGS ?= -O2
+endif
+
+CFLAGS += -I.
+CFLAGS += $(CDEFS) $(OPTFLAGS)
+
+SOURCE = \
+	fxt1.c \
+	dxtn.c \
+	wrapper.c \
+	texstore.c
+
+OBJECTS = $(SOURCE:.c=.obj)
+
+.c.obj:
+	$(CC) -Fo$@ $(CFLAGS) -c $<
+
+all: $(DLLNAME)
+
+$(DLLNAME): $(OBJECTS)
+	$(LD) -out:$@ $(LDFLAGS) $^ $(LDLIBS)
+
+clean:
+	-$(RM) *.obj
+
+realclean: clean
+	-$(RM) $(DLLNAME)
+	-$(RM) *.pdb
diff --git a/GLideNHQ/tc-1.1+/dxtn.c b/GLideNHQ/tc-1.1+/dxtn.c
new file mode 100644
index 00000000..e2d335ae
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/dxtn.c
@@ -0,0 +1,884 @@
+/*
+ * DXTn codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Added support for ARGB inputs, DXT3,5 workaround for ATI Radeons, and
+ * YUV conversions to determine representative colors.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <stdio.h>
+
+#include "types.h"
+#include "internal.h"
+#include "dxtn.h"
+
+
+/***************************************************************************\
+ * DXTn encoder
+ *
+ * The encoder was built by reversing the decoder,
+ * and is vaguely based on FXT1 codec. Note that this code
+ * is merely a proof of concept, since it is highly UNoptimized!
+\***************************************************************************/
+
+
+#define MAX_COMP 4 /* ever needed maximum number of components in texel */
+#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
+#define N_TEXELS 16 /* number of texels in a block (always 16) */
+#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
+
+
+static const int dxtn_color_tlat[2][4] = {
+    { 0, 2, 3, 1 },
+    { 0, 2, 1, 3 }
+};
+
+static const int dxtn_alpha_tlat[2][8] = {
+    { 0, 2, 3, 4, 5, 6, 7, 1 },
+    { 0, 2, 3, 4, 5, 1, 6, 7 }
+};
+
+
+static void
+dxt1_rgb_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];   /* interpolation vector */
+
+    dword hi; /* high doubleword */
+    int color0, color1;
+    int n_vect;
+    const int n_comp = 3;
+    int black = 0;
+
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    /* make the whole block opaque */
+    /* we will NEVER reference ACOMP of any pixel */
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        /* RGB to YUV conversion according to CCIR 601 specs
+         * Y = 0.299R+0.587G+0.114B
+         * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
+         * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
+         */
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (sum == 0) {
+	    black = 1;
+	}
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+    if (color0 == color1) {
+	/* we'll use 3-vector */
+	cc[0] = color0 | (color1 << 16);
+	hi = black ? -1 : 0;
+    } else {
+	if (black && ((color0 == 0) || (color1 == 0))) {
+	    /* we still can use 4-vector */
+	    black = 0;
+	}
+
+	if (black ^ (color0 <= color1)) {
+	    int aux;
+	    aux = color0;
+	    color0 = color1;
+	    color1 = aux;
+	    aux = minCol;
+	    minCol = maxCol;
+	    maxCol = aux;
+	}
+	n_vect = (color0 <= color1) ? 2 : 3;
+
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	cc[0] = color0 | (color1 << 16);
+	hi = 0;
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = 3;
+	    int sum = 0;
+	    if (black) {
+		for (i = 0; i < n_comp; i++) {
+		    sum += input[k][i];
+		}
+	    }
+	    if (!black || sum) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		texel = dxtn_color_tlat[black][texel];
+	    }
+	    /* add in texel */
+	    hi <<= 2;
+	    hi |= texel;
+	}
+    }
+    cc[1] = hi;
+}
+
+
+static void
+dxt1_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    dword hi;		/* high doubleword */
+    int color0, color1;
+    int n_vect;
+    const int n_comp = 3;
+    int transparent = 0;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (input[k][ACOMP] < 128) {
+	    transparent = 1;
+	}
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+    if (color0 == color1) {
+	/* we'll use 3-vector */
+	cc[0] = color0 | (color1 << 16);
+	hi = transparent ? -1 : 0;
+    } else {
+	if (transparent ^ (color0 <= color1)) {
+	    int aux;
+	    aux = color0;
+	    color0 = color1;
+	    color1 = aux;
+	    aux = minCol;
+	    minCol = maxCol;
+	    maxCol = aux;
+	}
+	n_vect = (color0 <= color1) ? 2 : 3;
+
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	cc[0] = color0 | (color1 << 16);
+	hi = 0;
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = 3;
+	    if (input[k][ACOMP] >= 128) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		texel = dxtn_color_tlat[transparent][texel];
+	    }
+	    /* add in texel */
+	    hi <<= 2;
+	    hi |= texel;
+	}
+    }
+    cc[1] = hi;
+}
+
+
+static void
+dxt3_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    dword lolo, lohi;	/* low quadword: lo dword, hi dword */
+    dword hihi;		/* high quadword: high dword */
+    int color0, color1;
+    const int n_vect = 3;
+    const int n_comp = 3;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    /* add in alphas */
+    lolo = lohi = 0;
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	/* add in alpha */
+	lohi <<= 4;
+	lohi |= input[k][ACOMP] >> 4;
+    }
+    cc[1] = lohi;
+    for (; k >= 0; k--) {
+	/* add in alpha */
+	lolo <<= 4;
+	lolo |= input[k][ACOMP] >> 4;
+    }
+    cc[0] = lolo;
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+#ifdef RADEON
+    /* H.Morii - Workaround for ATI Radeon
+     * According to the OpenGL EXT_texture_compression_s3tc specs,
+     * the encoding of the RGB components for DXT3 and DXT5 formats
+     * use the non-transparent encodings of DXT1 but treated as
+     * though color0 > color1, regardless of the actual values of
+     * color0 and color1. ATI Radeons however require the values to
+     * be color0 > color1.
+     */
+    if (color0 < color1) {
+	int aux;
+	aux = color0;
+	color0 = color1;
+	color1 = aux;
+	aux = minCol;
+	minCol = maxCol;
+	maxCol = aux;
+    }
+#endif
+
+    cc[2] = color0 | (color1 << 16);
+
+    hihi = 0;
+    if (color0 != color1) {
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    texel = dxtn_color_tlat[0][texel];
+	    /* add in texel */
+	    hihi <<= 2;
+	    hihi |= texel;
+	}
+    }
+    cc[3] = hihi;
+}
+
+
+static void
+dxt5_rgba_quantize (dword *cc, const byte *lines[], int comps)
+{
+    float b, iv[MAX_COMP];	/* interpolation vector */
+
+    qword lo;			/* low quadword */
+    dword hihi;		/* high quadword: high dword */
+    int color0, color1;
+    const int n_vect = 3;
+    const int n_comp = 3;
+
+#ifndef YUV
+    int minSum = 2000;          /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1;		/* small enough */
+    int minCol = 0;		/* phoudoin: silent compiler! */
+    int maxCol = 0;		/* phoudoin: silent compiler! */
+    int alpha0 = 2000;		/* big enough */
+    int alpha1 = -1;		/* small enough */
+    int anyZero = 0, anyOne = 0;
+    int a_vect;
+
+    byte input[N_TEXELS][MAX_COMP];
+    int i, k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 4 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+	if (alpha0 > input[k][ACOMP]) {
+	    alpha0 = input[k][ACOMP];
+	}
+	if (alpha1 < input[k][ACOMP]) {
+	    alpha1 = input[k][ACOMP];
+	}
+	if (input[k][ACOMP] == 0) {
+	    anyZero = 1;
+	}
+	if (input[k][ACOMP] == 255) {
+	    anyOne = 1;
+	}
+    }
+
+    /* add in alphas */
+    if (alpha0 == alpha1) {
+	/* we'll use 6-vector */
+	cc[0] = alpha0 | (alpha1 << 8);
+	cc[1] = 0;
+    } else {
+	if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
+	    /* we still might use 8-vector */
+	    anyZero = 0;
+	}
+	if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
+	    /* we still might use 8-vector */
+	    anyOne = 0;
+	}
+	if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
+	    int aux;
+	    aux = alpha0;
+	    alpha0 = alpha1;
+	    alpha1 = aux;
+	}
+	a_vect = (alpha0 <= alpha1) ? 5 : 7;
+
+	/* compute interpolation vector */
+	iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
+	b = -iv[ACOMP] * alpha0 + 0.5F;
+
+	/* add in alphas */
+	Q_MOV32(lo, 0);
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel = -1;
+	    if (anyZero | anyOne) {
+		if (input[k][ACOMP] == 0) {
+		    texel = 6;
+		} else if (input[k][ACOMP] == 255) {
+		    texel = 7;
+		}
+	    }
+	    /* interpolate alpha */
+	    if (texel == -1) {
+		float dot = input[k][ACOMP] * iv[ACOMP];
+		texel = (int)(dot + b);
+#if SAFECDOT
+		if (texel < 0) {
+		    texel = 0;
+		} else if (texel > a_vect) {
+		    texel = a_vect;
+		}
+#endif
+		texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
+	    }
+	    /* add in texel */
+	    Q_SHL(lo, 3);
+	    Q_OR32(lo, texel);
+	}
+	Q_SHL(lo, 16);
+	Q_OR32(lo, alpha0 | (alpha1 << 8));
+	((qword *)cc)[0] = lo;
+    }
+
+    color0 = COLOR565(input[minCol]);
+    color1 = COLOR565(input[maxCol]);
+
+#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
+    if (color0 < color1) {
+	int aux;
+	aux = color0;
+	color0 = color1;
+	color1 = aux;
+	aux = minCol;
+	minCol = maxCol;
+	maxCol = aux;
+    }
+#endif
+
+    cc[2] = color0 | (color1 << 16);
+
+    hihi = 0;
+    if (color0 != color1) {
+	MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
+
+	/* add in texels */
+	for (k = N_TEXELS - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    texel = dxtn_color_tlat[0][texel];
+	    /* add in texel */
+	    hihi <<= 2;
+	    hihi |= texel;
+	}
+    }
+    cc[3] = hihi;
+}
+
+
+#define ENCODER(dxtn, n)						\
+int TAPIENTRY								\
+dxtn##_encode (int width, int height, int comps,			\
+	       const void *source, int srcRowStride,			\
+	       void *dest, int destRowStride)				\
+{									\
+    int x, y;								\
+    const byte *data;							\
+    dword *encoded = (dword *)dest;					\
+    void *newSource = NULL;						\
+									\
+    /* Replicate image if width is not M4 or height is not M4 */	\
+    if ((width & 3) | (height & 3)) {					\
+	int newWidth = (width + 3) & ~3;				\
+	int newHeight = (height + 3) & ~3;				\
+	newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
+	_mesa_upscale_teximage2d(width, height, newWidth, newHeight,	\
+                               comps, (const byte *)source,		\
+			       srcRowStride, (byte *)newSource);	\
+	source = newSource;						\
+	width = newWidth;						\
+	height = newHeight;						\
+	srcRowStride = comps * newWidth;				\
+    }									\
+									\
+    data = (const byte *)source;					\
+    destRowStride = (destRowStride - width * n) / 4;			\
+    for (y = 0; y < height; y += 4) {					\
+	unsigned int offs = 0 + (y + 0) * srcRowStride;			\
+	for (x = 0; x < width; x += 4) {				\
+	    const byte *lines[4];					\
+	    lines[0] = &data[offs];					\
+	    lines[1] = lines[0] + srcRowStride;				\
+	    lines[2] = lines[1] + srcRowStride;				\
+	    lines[3] = lines[2] + srcRowStride;				\
+	    offs += 4 * comps;						\
+	    dxtn##_quantize(encoded, lines, comps);			\
+	    /* 4x4 block */						\
+	    encoded += n;						\
+	}								\
+	encoded += destRowStride;					\
+    }									\
+									\
+    if (newSource != NULL) {						\
+	free(newSource);						\
+    }									\
+									\
+    return 0;								\
+}
+
+ENCODER(dxt1_rgb,  2)
+ENCODER(dxt1_rgba, 2)
+ENCODER(dxt3_rgba, 4)
+ENCODER(dxt5_rgba, 4)
+
+
+/***************************************************************************\
+ * DXTn decoder
+ *
+ * The decoder is based on GL_EXT_texture_compression_s3tc
+ * specification and serves as a concept for the encoder.
+\***************************************************************************/
+
+
+/* lookup table for scaling 4 bit colors up to 8 bits */
+static const byte _rgb_scale_4[] = {
+    0,   17,  34,  51,  68,  85,  102, 119,
+    136, 153, 170, 187, 204, 221, 238, 255
+};
+
+/* lookup table for scaling 5 bit colors up to 8 bits */
+static const byte _rgb_scale_5[] = {
+    0,   8,   16,  25,  33,  41,  49,  58,
+    66,  74,  82,  90,  99,  107, 115, 123,
+    132, 140, 148, 156, 165, 173, 181, 189,
+    197, 206, 214, 222, 230, 239, 247, 255
+};
+
+/* lookup table for scaling 6 bit colors up to 8 bits */
+static const byte _rgb_scale_6[] = {
+    0,   4,   8,   12,  16,  20,  24,  28,
+    32,  36,  40,  45,  49,  53,  57,  61,
+    65,  69,  73,  77,  81,  85,  89,  93,
+    97,  101, 105, 109, 113, 117, 121, 125,
+    130, 134, 138, 142, 146, 150, 154, 158,
+    162, 166, 170, 174, 178, 182, 186, 190,
+    194, 198, 202, 206, 210, 215, 219, 223,
+    227, 231, 235, 239, 243, 247, 251, 255
+};
+
+
+#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
+#define UP4(c) _rgb_scale_4[(c) & 15]
+#define UP5(c) _rgb_scale_5[(c) & 31]
+#define UP6(c) _rgb_scale_6[(c) & 63]
+#define ZERO_4UBV(v) *((dword *)(v)) = 0
+
+
+void TAPIENTRY
+dxt1_rgb_decode_1 (const void *texture, int stride,
+		   int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
+    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 11));
+	rgba[GCOMP] = UP6(CC_SEL(src,  5));
+	rgba[BCOMP] = UP5(CC_SEL(src,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 27));
+	rgba[GCOMP] = UP6(CC_SEL(src, 21));
+	rgba[BCOMP] = UP5(CC_SEL(src, 16));
+    } else {
+	const word col0 = src[0] | (src[1] << 8);
+	const word col1 = src[2] | (src[3] << 8);
+	if (col0 > col1) {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
+	    } else {
+		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
+	    }
+	} else {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
+		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
+		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
+	    } else {
+		ZERO_4UBV(rgba);
+	    }
+	}
+    }
+    rgba[ACOMP] = 255;
+}
+
+
+void TAPIENTRY
+dxt1_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    /* Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3. */
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
+    const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 11));
+	rgba[GCOMP] = UP6(CC_SEL(src,  5));
+	rgba[BCOMP] = UP5(CC_SEL(src,  0));
+	rgba[ACOMP] = 255;
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(src, 27));
+	rgba[GCOMP] = UP6(CC_SEL(src, 21));
+	rgba[BCOMP] = UP5(CC_SEL(src, 16));
+	rgba[ACOMP] = 255;
+    } else {
+	const word col0 = src[0] | (src[1] << 8);
+	const word col1 = src[2] | (src[3] << 8);
+	if (col0 > col1) {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) * 2 + UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) * 2 + UP5(col1      )) / 3;
+	    } else {
+		rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
+		rgba[GCOMP] = (UP6(col0 >>  5) + 2 * UP6(col1 >>  5)) / 3;
+		rgba[BCOMP] = (UP5(col0      ) + 2 * UP5(col1      )) / 3;
+	    }
+	    rgba[ACOMP] = 255;
+	} else {
+	    if (code == 2) {
+		rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
+		rgba[GCOMP] = (UP6(col0 >>  5) + UP6(col1 >>  5)) / 2;
+		rgba[BCOMP] = (UP5(col0      ) + UP5(col1      )) / 2;
+		rgba[ACOMP] = 255;
+	    } else {
+		ZERO_4UBV(rgba);
+	    }
+	}
+    }
+}
+
+
+void TAPIENTRY
+dxt3_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
+    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    const dword *cc = (const dword *)(src + 8);
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
+	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
+	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
+	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
+	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
+    } else if (code == 2) {
+	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
+    } else {
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
+    }
+    rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
+}
+
+
+void TAPIENTRY
+dxt5_rgba_decode_1 (const void *texture, int stride,
+		    int i, int j, byte *rgba)
+{
+    const byte *src = (const byte *)texture
+		       + ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
+    const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
+    const dword *cc = (const dword *)(src + 8);
+    const byte alpha0 = src[0];
+    const byte alpha1 = src[1];
+    const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
+    const int acode = ((alphaShift == 31)
+			? CC_SEL(src + 2, alphaShift - 16)
+			: CC_SEL(src, alphaShift)) & 0x7;
+    if (code == 0) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 11));
+	rgba[GCOMP] = UP6(CC_SEL(cc,  5));
+	rgba[BCOMP] = UP5(CC_SEL(cc,  0));
+    } else if (code == 1) {
+	rgba[RCOMP] = UP5(CC_SEL(cc, 27));
+	rgba[GCOMP] = UP6(CC_SEL(cc, 21));
+	rgba[BCOMP] = UP5(CC_SEL(cc, 16));
+    } else if (code == 2) {
+	/* (col0 * (4 - code) + col1 * (code - 1)) / 3 */
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
+    } else {
+	rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
+	rgba[GCOMP] = (UP6(CC_SEL(cc,  5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
+	rgba[BCOMP] = (UP5(CC_SEL(cc,  0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
+    }
+    if (acode == 0) {
+	rgba[ACOMP] = alpha0;
+    } else if (acode == 1) {
+	rgba[ACOMP] = alpha1;
+    } else if (alpha0 > alpha1) {
+	rgba[ACOMP] = ((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7;
+    } else if (acode == 6) {
+	rgba[ACOMP] = 0;
+    } else if (acode == 7) {
+	rgba[ACOMP] = 255;
+    } else {
+	rgba[ACOMP] = ((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5;
+    }
+}
diff --git a/GLideNHQ/tc-1.1+/dxtn.h b/GLideNHQ/tc-1.1+/dxtn.h
new file mode 100644
index 00000000..4078fd9f
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/dxtn.h
@@ -0,0 +1,62 @@
+/*
+ * DXTn codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#ifndef DXTN_H_included
+#define DXTN_H_included
+
+TAPI int TAPIENTRY
+dxt1_rgb_encode (int width, int height, int comps,
+		 const void *source, int srcRowStride,
+		 void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt1_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt3_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI int TAPIENTRY
+dxt5_rgba_encode (int width, int height, int comps,
+		  const void *source, int srcRowStride,
+		  void *dest, int destRowStride);
+
+TAPI void TAPIENTRY
+dxt1_rgb_decode_1 (const void *texture, int stride /* in pixels */,
+		   int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt1_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt3_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+TAPI void TAPIENTRY
+dxt5_rgba_decode_1 (const void *texture, int stride /* in pixels */,
+		    int i, int j, byte *rgba);
+
+#endif
diff --git a/GLideNHQ/tc-1.1+/fxt1.c b/GLideNHQ/tc-1.1+/fxt1.c
new file mode 100644
index 00000000..1287ced5
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/fxt1.c
@@ -0,0 +1,1459 @@
+/*
+ * FXT1 codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * Added support for ARGB inputs.
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "internal.h"
+#include "fxt1.h"
+
+
+/***************************************************************************\
+ * FXT1 encoder
+ *
+ * The encoder was built by reversing the decoder,
+ * and is vaguely based on Texus2 by 3dfx. Note that this code
+ * is merely a proof of concept, since it is highly UNoptimized;
+ * moreover, it is sub-optimal due to initial conditions passed
+ * to Lloyd's algorithm (the interpolation modes are even worse).
+\***************************************************************************/
+
+
+#define MAX_COMP 4 /* ever needed maximum number of components in texel */
+#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
+#define N_TEXELS 32 /* number of texels in a block (always 32) */
+#define LL_N_REP 50 /* number of iterations in lloyd's vq */
+#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
+#define LL_RMS_E 255 /* fault tolerance (maximum error) */
+#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
+#define ISTBLACK(v) (*((dword *)(v)) == 0)
+#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))
+
+
+static int
+fxt1_bestcol (float vec[][MAX_COMP], int nv,
+	      byte input[MAX_COMP], int nc)
+{
+    int i, j, best = -1;
+    float err = 1e9; /* big enough */
+
+    for (j = 0; j < nv; j++) {
+	float e = 0.0F;
+	for (i = 0; i < nc; i++) {
+	    e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
+	}
+	if (e < err) {
+	    err = e;
+	    best = j;
+	}
+    }
+
+    return best;
+}
+
+
+static int
+fxt1_worst (float vec[MAX_COMP],
+	    byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    int i, k, worst = -1;
+    float err = -1.0F; /* small enough */
+
+    for (k = 0; k < n; k++) {
+	float e = 0.0F;
+	for (i = 0; i < nc; i++) {
+	    e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
+	}
+	if (e > err) {
+	    err = e;
+	    worst = k;
+	}
+    }
+
+    return worst;
+}
+
+
+static int
+fxt1_variance (double variance[MAX_COMP],
+	       byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    int i, k, best = 0;
+    dword sx, sx2;
+    double var, maxvar = -1; /* small enough */
+    double teenth = 1.0 / n;
+
+    for (i = 0; i < nc; i++) {
+	sx = sx2 = 0;
+	for (k = 0; k < n; k++) {
+	    int t = input[k][i];
+	    sx += t;
+	    sx2 += t * t;
+	}
+	var = sx2 * teenth - sx * sx * teenth * teenth;
+	if (maxvar < var) {
+	    maxvar = var;
+	    best = i;
+	}
+	if (variance) {
+	    variance[i] = var;
+	}
+    }
+
+    return best;
+}
+
+
+static int
+fxt1_choose (float vec[][MAX_COMP], int nv,
+	     byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+#if 0
+    /* Choose colors from a grid.
+     */
+    int i, j;
+
+    for (j = 0; j < nv; j++) {
+	int m = j * (n - 1) / (nv - 1);
+	for (i = 0; i < nc; i++) {
+	    vec[j][i] = input[m][i];
+	}
+    }
+#else
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 8x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    int i, j, k;
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    struct {
+	int flag;
+	dword key;
+	int freq;
+	int idx;
+    } hist[N_TEXELS];
+    int lenh = 0;
+
+    memset(hist, 0, sizeof(hist));
+
+    for (k = 0; k < n; k++) {
+	int l;
+	dword key = 0;
+	int sum = 0;
+	for (i = 0; i < nc; i++) {
+	    key <<= 8;
+	    key |= input[k][i];
+#ifndef YUV
+	    sum += input[k][i];
+#else
+            /* RGB to YUV conversion according to CCIR 601 specs
+             * Y = 0.299R+0.587G+0.114B
+             * U = 0.713(R - Y) = 0.500R-0.419G-0.081B
+             * V = 0.564(B - Y) = -0.169R-0.331G+0.500B
+             */
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+        }
+	for (l = 0; l < n; l++) {
+	    if (!hist[l].flag) {
+		/* alloc new slot */
+		hist[l].flag = !0;
+		hist[l].key = key;
+		hist[l].freq = 1;
+		hist[l].idx = k;
+		lenh = l + 1;
+		break;
+	    } else if (hist[l].key == key) {
+		hist[l].freq++;
+		break;
+	    }
+	}
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    if (lenh <= nv) {
+	for (j = 0; j < lenh; j++) {
+	    for (i = 0; i < nc; i++) {
+		vec[j][i] = (float)input[hist[j].idx][i];
+	    }
+	}
+	for (; j < nv; j++) {
+	    for (i = 0; i < nc; i++) {
+		vec[j][i] = vec[0][i];
+	    }
+	}
+	return 0;
+    }
+
+    for (j = 0; j < nv; j++) {
+	for (i = 0; i < nc; i++) {
+	    vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
+	}
+    }
+#endif
+
+    return !0;
+}
+
+
+static int
+fxt1_lloyd (float vec[][MAX_COMP], int nv,
+	    byte input[N_TEXELS][MAX_COMP], int nc, int n)
+{
+    /* Use the generalized lloyd's algorithm for VQ:
+     *     find 4 color vectors.
+     *
+     *     for each sample color
+     *         sort to nearest vector.
+     *
+     *     replace each vector with the centroid of it's matching colors.
+     *
+     *     repeat until RMS doesn't improve.
+     *
+     *     if a color vector has no samples, or becomes the same as another
+     *     vector, replace it with the color which is farthest from a sample.
+     *
+     * vec[][MAX_COMP]           initial vectors and resulting colors
+     * nv                        number of resulting colors required
+     * input[N_TEXELS][MAX_COMP] input texels
+     * nc                        number of components in input / vec
+     * n                         number of input samples
+     */
+
+    int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
+    int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
+    float error, lasterror = 1e9;
+
+    int i, j, k, rep;
+
+    /* the quantizer */
+    for (rep = 0; rep < LL_N_REP; rep++) {
+	/* reset sums & counters */
+	for (j = 0; j < nv; j++) {
+	    for (i = 0; i < nc; i++) {
+		sum[j][i] = 0;
+	    }
+	    cnt[j] = 0;
+	}
+	error = 0;
+
+	/* scan whole block */
+	for (k = 0; k < n; k++) {
+#if 1
+	    int best = -1;
+	    float err = 1e9; /* big enough */
+	    /* determine best vector */
+	    for (j = 0; j < nv; j++) {
+		float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
+			  (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
+			  (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
+		if (nc == 4) {
+		    e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
+		}
+		if (e < err) {
+		    err = e;
+		    best = j;
+		}
+	    }
+#else
+	    int best = fxt1_bestcol(vec, nv, input[k], nc, &err);
+#endif
+	    /* add in closest color */
+	    for (i = 0; i < nc; i++) {
+		sum[best][i] += input[k][i];
+	    }
+	    /* mark this vector as used */
+	    cnt[best]++;
+	    /* accumulate error */
+	    error += err;
+	}
+
+	/* check RMS */
+	if ((error < LL_RMS_E) ||
+	    ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
+	    return !0; /* good match */
+	}
+	lasterror = error;
+
+	/* move each vector to the barycenter of its closest colors */
+	for (j = 0; j < nv; j++) {
+	    if (cnt[j]) {
+		float div = 1.0F / cnt[j];
+		for (i = 0; i < nc; i++) {
+		    vec[j][i] = div * sum[j][i];
+		}
+	    } else {
+		/* this vec has no samples or is identical with a previous vec */
+		int worst = fxt1_worst(vec[j], input, nc, n);
+		for (i = 0; i < nc; i++) {
+		    vec[j][i] = input[worst][i];
+		}
+	    }
+	}
+    }
+
+    return 0; /* could not converge fast enough */
+}
+
+
+static void
+fxt1_quantize_CHROMA (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 4; /* 4 base vectors to find */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    float vec[MAX_VECT][MAX_COMP];
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
+	fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
+    }
+
+    Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+
+    lohi = lolo = 0;
+    /* right microtile */
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	lohi <<= 2;
+	lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+    }
+    /* left microtile */
+    for (; k >= 0; k--) {
+	lolo <<= 2;
+	lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
+    }
+    cc[1] = lohi;
+    cc[0] = lolo;
+}
+
+
+static void
+fxt1_quantize_ALPHA0 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP],
+		      byte reord[N_TEXELS][MAX_COMP], int n)
+{
+    const int n_vect = 3; /* 3 base vectors to find */
+    const int n_comp = 4; /* 4 components: R, G, B, A */
+    float vec[MAX_VECT][MAX_COMP];
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    /* the last vector indicates zero */
+    for (i = 0; i < n_comp; i++) {
+	vec[n_vect][i] = 0;
+    }
+
+    /* the first n texels in reord are guaranteed to be non-zero */
+    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
+	fxt1_lloyd(vec, n_vect, reord, n_comp, n);
+    }
+
+    Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
+    for (j = n_vect - 1; j >= 0; j--) {
+	/* add in alphas */
+	Q_SHL(hi, 5);
+	Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+    }
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp - 1; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+
+    lohi = lolo = 0;
+    /* right microtile */
+    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	lohi <<= 2;
+	lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+    }
+    /* left microtile */
+    for (; k >= 0; k--) {
+	lolo <<= 2;
+	lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
+    }
+    cc[1] = lohi;
+    cc[0] = lolo;
+}
+
+
+static void
+fxt1_quantize_ALPHA1 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 3; /* highest vector number in each microtile */
+    const int n_comp = 4; /* 4 components: R, G, B, A */
+    float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minSum;
+    int maxSum;
+    int minColL = 0, maxColL = 0;
+    int minColR = 0, maxColR = 0;
+    int sumL = 0, sumR = 0;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColL = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColL = k;
+	}
+	sumL += sum;
+    }
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColR = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColR = k;
+	}
+	sumR += sum;
+    }
+
+    /* choose the common vector (yuck!) */
+    {
+	int j1, j2;
+	int v1 = 0, v2 = 0;
+	float err = 1e9; /* big enough */
+	float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+	for (i = 0; i < n_comp; i++) {
+	    tv[0][i] = input[minColL][i];
+	    tv[1][i] = input[maxColL][i];
+	    tv[2][i] = input[minColR][i];
+	    tv[3][i] = input[maxColR][i];
+	}
+	for (j1 = 0; j1 < 2; j1++) {
+	    for (j2 = 2; j2 < 4; j2++) {
+		float e = 0.0F;
+		for (i = 0; i < n_comp; i++) {
+		    e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
+		}
+		if (e < err) {
+		    err = e;
+		    v1 = j1;
+		    v2 = j2;
+		}
+	    }
+	}
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = tv[1 - v1][i];
+	    vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
+	    vec[2][i] = tv[5 - v2][i];
+	}
+    }
+
+    /* left microtile */
+    cc[0] = 0;
+    if (minColL != maxColL) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	/* add in texels */
+	lolo = 0;
+	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lolo <<= 2;
+	    lolo |= texel;
+	}
+
+	cc[0] = lolo;
+    }
+
+    /* right microtile */
+    cc[1] = 0;
+    if (minColR != maxColR) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
+
+	/* add in texels */
+	lohi = 0;
+	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lohi <<= 2;
+	    lohi |= texel;
+	}
+
+	cc[1] = lohi;
+    }
+
+    Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
+    for (j = n_vect - 1; j >= 0; j--) {
+	/* add in alphas */
+	Q_SHL(hi, 5);
+	Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));
+    }
+    for (j = n_vect - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp - 1; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, (dword)(vec[j][i] / 8.0F));
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize_HI (dword *cc,
+		  byte input[N_TEXELS][MAX_COMP],
+		  byte reord[N_TEXELS][MAX_COMP], int n)
+{
+    const int n_vect = 6; /* highest vector number */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    float b = 0.0F;       /* phoudoin: silent compiler! */
+    float iv[MAX_COMP];   /* interpolation vector */
+    int i, k;
+    dword hihi; /* high quadword: hi dword */
+
+#ifndef YUV
+    int minSum = 2000; /* big enough */
+#else
+    int minSum = 2000000;
+#endif
+    int maxSum = -1; /* small enough */
+    int minCol = 0; /* phoudoin: silent compiler! */
+    int maxCol = 0; /* phoudoin: silent compiler! */
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 8x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+    for (k = 0; k < n; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += reord[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minCol = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxCol = k;
+	}
+    }
+
+    hihi = 0; /* cc-hi = "00" */
+    for (i = 0; i < n_comp; i++) {
+	/* add in colors */
+	hihi <<= 5;
+	hihi |= reord[maxCol][i] >> 3;
+    }
+    for (i = 0; i < n_comp; i++) {
+	/* add in colors */
+	hihi <<= 5;
+	hihi |= reord[minCol][i] >> 3;
+    }
+    cc[3] = hihi;
+    cc[0] = cc[1] = cc[2] = 0;
+
+    /* compute interpolation vector */
+    if (minCol != maxCol) {
+	MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
+    }
+
+    /* add in texels */
+    for (k = N_TEXELS - 1; k >= 0; k--) {
+	int t = k * 3;
+	dword *kk = (dword *)((byte *)cc + t / 8);
+	int texel = n_vect + 1; /* transparent black */
+
+	if (!ISTBLACK(input[k])) {
+	    if (minCol != maxCol) {
+		/* interpolate color */
+		CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		/* add in texel */
+		kk[0] |= texel << (t & 7);
+	    }
+	} else {
+	    /* add in texel */
+	    kk[0] |= texel << (t & 7);
+	}
+    }
+}
+
+
+static void
+fxt1_quantize_MIXED1 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 2; /* highest vector number in each microtile */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minSum;
+    int maxSum;
+    int minColL = 0, maxColL = -1;
+    int minColR = 0, maxColR = -1;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	if (!ISTBLACK(input[k])) {
+	    int sum = 0;
+#ifndef YUV
+	    for (i = 0; i < n_comp; i++) {
+		sum += input[k][i];
+	    }
+#else
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	    if (minSum > sum) {
+		minSum = sum;
+		minColL = k;
+	    }
+	    if (maxSum < sum) {
+		maxSum = sum;
+		maxColL = k;
+	    }
+	}
+    }
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	if (!ISTBLACK(input[k])) {
+	    int sum = 0;
+#ifndef YUV
+	    for (i = 0; i < n_comp; i++) {
+		sum += input[k][i];
+	    }
+#else
+            sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	    if (minSum > sum) {
+		minSum = sum;
+		minColR = k;
+	    }
+	    if (maxSum < sum) {
+		maxSum = sum;
+		maxColR = k;
+	    }
+	}
+    }
+
+    /* left microtile */
+    if (maxColL == -1) {
+	/* all transparent black */
+	cc[0] = ~0UL;
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = 0;
+	    vec[1][i] = 0;
+	}
+    } else {
+	cc[0] = 0;
+	for (i = 0; i < n_comp; i++) {
+	    vec[0][i] = input[minColL][i];
+	    vec[1][i] = input[maxColL][i];
+	}
+	if (minColL != maxColL) {
+	    /* compute interpolation vector */
+	    MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	    /* add in texels */
+	    lolo = 0;
+	    for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+		int texel = n_vect + 1;	/* transparent black */
+		if (!ISTBLACK(input[k])) {
+		    /* interpolate color */
+		    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		}
+		/* add in texel */
+		lolo <<= 2;
+		lolo |= texel;
+	    }
+	    cc[0] = lolo;
+	}
+    }
+
+    /* right microtile */
+    if (maxColR == -1) {
+	/* all transparent black */
+	cc[1] = ~0UL;
+	for (i = 0; i < n_comp; i++) {
+	    vec[2][i] = 0;
+	    vec[3][i] = 0;
+	}
+    } else {
+	cc[1] = 0;
+	for (i = 0; i < n_comp; i++) {
+	    vec[2][i] = input[minColR][i];
+	    vec[3][i] = input[maxColR][i];
+	}
+	if (minColR != maxColR) {
+	    /* compute interpolation vector */
+	    MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+	    /* add in texels */
+	    lohi = 0;
+	    for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+		int texel = n_vect + 1;	/* transparent black */
+		if (!ISTBLACK(input[k])) {
+		    /* interpolate color */
+		    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+		}
+		/* add in texel */
+		lohi <<= 2;
+		lohi |= texel;
+	    }
+	    cc[1] = lohi;
+	}
+    }
+
+    Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+    for (j = 2 * 2 - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, vec[j][i] >> 3);
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize_MIXED0 (dword *cc,
+		      byte input[N_TEXELS][MAX_COMP])
+{
+    const int n_vect = 3; /* highest vector number in each microtile */
+    const int n_comp = 3; /* 3 components: R, G, B */
+    byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
+    float b, iv[MAX_COMP]; /* interpolation vector */
+    int i, j, k;
+    qword hi; /* high quadword */
+    dword lohi, lolo; /* low quadword: hi dword, lo dword */
+
+    int minColL = 0, maxColL = 0;
+    int minColR = 0, maxColR = 0;
+#if 0
+    int minSum;
+    int maxSum;
+
+    /* Our solution here is to find the darkest and brightest colors in
+     * the 4x4 tile and use those as the two representative colors.
+     * There are probably better algorithms to use (histogram-based).
+     */
+#ifndef YUV
+    minSum = 2000; /* big enough */
+#else
+    minSum = 2000000;
+#endif
+    maxSum = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColL = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColL = k;
+	}
+    }
+    minSum = 2000; /* big enough */
+    maxSum = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int sum = 0;
+#ifndef YUV
+	for (i = 0; i < n_comp; i++) {
+	    sum += input[k][i];
+	}
+#else
+        sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] +  114 * input[k][BCOMP];
+#endif
+	if (minSum > sum) {
+	    minSum = sum;
+	    minColR = k;
+	}
+	if (maxSum < sum) {
+	    maxSum = sum;
+	    maxColR = k;
+	}
+    }
+#else
+    int minVal;
+    int maxVal;
+    int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
+    int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
+
+    /* Scan the channel with max variance for lo & hi
+     * and use those as the two representative colors.
+     */
+    minVal = 2000; /* big enough */
+    maxVal = -1; /* small enough */
+    for (k = 0; k < N_TEXELS / 2; k++) {
+	int t = input[k][maxVarL];
+	if (minVal > t) {
+	    minVal = t;
+	    minColL = k;
+	}
+	if (maxVal < t) {
+	    maxVal = t;
+	    maxColL = k;
+	}
+    }
+    minVal = 2000; /* big enough */
+    maxVal = -1; /* small enough */
+    for (; k < N_TEXELS; k++) {
+	int t = input[k][maxVarR];
+	if (minVal > t) {
+	    minVal = t;
+	    minColR = k;
+	}
+	if (maxVal < t) {
+	    maxVal = t;
+	    maxColR = k;
+	}
+    }
+#endif
+
+    /* left microtile */
+    cc[0] = 0;
+    for (i = 0; i < n_comp; i++) {
+	vec[0][i] = input[minColL][i];
+	vec[1][i] = input[maxColL][i];
+    }
+    if (minColL != maxColL) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
+
+	/* add in texels */
+	lolo = 0;
+	for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lolo <<= 2;
+	    lolo |= texel;
+	}
+
+	/* funky encoding for LSB of green */
+	if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
+	    for (i = 0; i < n_comp; i++) {
+		vec[1][i] = input[minColL][i];
+		vec[0][i] = input[maxColL][i];
+	    }
+	    lolo = ~lolo;
+	}
+
+	cc[0] = lolo;
+    }
+
+    /* right microtile */
+    cc[1] = 0;
+    for (i = 0; i < n_comp; i++) {
+	vec[2][i] = input[minColR][i];
+	vec[3][i] = input[maxColR][i];
+    }
+    if (minColR != maxColR) {
+	/* compute interpolation vector */
+	MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
+
+	/* add in texels */
+	lohi = 0;
+	for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
+	    int texel;
+	    /* interpolate color */
+	    CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
+	    /* add in texel */
+	    lohi <<= 2;
+	    lohi |= texel;
+	}
+
+	/* funky encoding for LSB of green */
+	if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
+	    for (i = 0; i < n_comp; i++) {
+		vec[3][i] = input[minColR][i];
+		vec[2][i] = input[maxColR][i];
+	    }
+	    lohi = ~lohi;
+	}
+
+	cc[1] = lohi;
+    }
+
+    Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
+    for (j = 2 * 2 - 1; j >= 0; j--) {
+	for (i = 0; i < n_comp; i++) {
+	    /* add in colors */
+	    Q_SHL(hi, 5);
+	    Q_OR32(hi, vec[j][i] >> 3);
+	}
+    }
+    ((qword *)cc)[1] = hi;
+}
+
+
+static void
+fxt1_quantize (dword *cc, const byte *lines[], int comps)
+{
+    int trualpha;
+    byte reord[N_TEXELS][MAX_COMP];
+
+    byte input[N_TEXELS][MAX_COMP];
+#ifndef ARGB
+    int i;
+#endif
+    int k, l;
+
+    if (comps == 3) {
+	/* make the whole block opaque */
+	memset(input, -1, sizeof(input));
+    }
+
+    /* 8 texels each line */
+#ifndef ARGB
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4][i] = *lines[l]++;
+	    }
+	}
+	for (; k < 8; k++) {
+	    for (i = 0; i < comps; i++) {
+		input[k + l * 4 + 12][i] = *lines[l]++;
+	    }
+	}
+    }
+#else
+    /* H.Morii - support for ARGB inputs */
+    for (l = 0; l < 4; l++) {
+	for (k = 0; k < 4; k++) {
+          input[k + l * 4][2] = *lines[l]++;
+          input[k + l * 4][1] = *lines[l]++;
+          input[k + l * 4][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4][3] = *lines[l]++;
+	}
+	for (; k < 8; k++) {
+          input[k + l * 4 + 12][2] = *lines[l]++;
+          input[k + l * 4 + 12][1] = *lines[l]++;
+          input[k + l * 4 + 12][0] = *lines[l]++;
+          if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;
+	}
+    }
+#endif
+
+    /* block layout:
+     * 00, 01, 02, 03, 08, 09, 0a, 0b
+     * 10, 11, 12, 13, 18, 19, 1a, 1b
+     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
+     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
+     */
+
+    /* [dBorca]
+     * stupidity flows forth from this
+     */
+    l = N_TEXELS;
+    trualpha = 0;
+    if (comps == 4) {
+	/* skip all transparent black texels */
+	l = 0;
+	for (k = 0; k < N_TEXELS; k++) {
+	    /* test all components against 0 */
+	    if (!ISTBLACK(input[k])) {
+		/* texel is not transparent black */
+		COPY_4UBV(reord[l], input[k]);
+		if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
+		    /* non-opaque texel */
+		    trualpha = !0;
+		}
+		l++;
+	    }
+	}
+    }
+
+#if 0
+    if (trualpha) {
+	fxt1_quantize_ALPHA0(cc, input, reord, l);
+    } else if (l == 0) {
+	cc[0] = cc[1] = cc[2] = -1;
+	cc[3] = 0;
+    } else if (l < N_TEXELS) {
+	fxt1_quantize_HI(cc, input, reord, l);
+    } else {
+	fxt1_quantize_CHROMA(cc, input);
+    }
+    (void)fxt1_quantize_ALPHA1;
+    (void)fxt1_quantize_MIXED1;
+    (void)fxt1_quantize_MIXED0;
+#else
+    if (trualpha) {
+	fxt1_quantize_ALPHA1(cc, input);
+    } else if (l == 0) {
+	cc[0] = cc[1] = cc[2] = ~0UL;
+	cc[3] = 0;
+    } else if (l < N_TEXELS) {
+	fxt1_quantize_MIXED1(cc, input);
+    } else {
+	fxt1_quantize_MIXED0(cc, input);
+    }
+    (void)fxt1_quantize_ALPHA0;
+    (void)fxt1_quantize_HI;
+    (void)fxt1_quantize_CHROMA;
+#endif
+}
+
+
+TAPI int TAPIENTRY
+fxt1_encode (int width, int height, int comps,
+	     const void *source, int srcRowStride,
+	     void *dest, int destRowStride)
+{
+    int x, y;
+    const byte *data;
+    dword *encoded = (dword *)dest;
+    void *newSource = NULL;
+
+    /* Replicate image if width is not M8 or height is not M4 */
+    if ((width & 7) | (height & 3)) {
+	int newWidth = (width + 7) & ~7;
+	int newHeight = (height + 3) & ~3;
+	newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));
+	_mesa_upscale_teximage2d(width, height, newWidth, newHeight,
+				 comps, (const byte *)source,
+				 srcRowStride, (byte *)newSource);
+	source = newSource;
+	width = newWidth;
+	height = newHeight;
+	srcRowStride = comps * newWidth;
+    }
+
+    data = (const byte *)source;
+    destRowStride = (destRowStride - width * 2) / 4;
+    for (y = 0; y < height; y += 4) {
+	unsigned int offs = 0 + (y + 0) * srcRowStride;
+	for (x = 0; x < width; x += 8) {
+	    const byte *lines[4];
+	    lines[0] = &data[offs];
+	    lines[1] = lines[0] + srcRowStride;
+	    lines[2] = lines[1] + srcRowStride;
+	    lines[3] = lines[2] + srcRowStride;
+	    offs += 8 * comps;
+	    fxt1_quantize(encoded, lines, comps);
+	    /* 128 bits per 8x4 block */
+	    encoded += 4;
+	}
+	encoded += destRowStride;
+    }
+
+    if (newSource != NULL) {
+	free(newSource);
+    }
+
+    return 0;
+}
+
+
+/***************************************************************************\
+ * FXT1 decoder
+ *
+ * The decoder is based on GL_3DFX_texture_compression_FXT1
+ * specification and serves as a concept for the encoder.
+\***************************************************************************/
+
+
+/* lookup table for scaling 5 bit colors up to 8 bits */
+static const byte _rgb_scale_5[] = {
+    0,   8,   16,  25,  33,  41,  49,  58,
+    66,  74,  82,  90,  99,  107, 115, 123,
+    132, 140, 148, 156, 165, 173, 181, 189,
+    197, 206, 214, 222, 230, 239, 247, 255
+};
+
+/* lookup table for scaling 6 bit colors up to 8 bits */
+static const byte _rgb_scale_6[] = {
+    0,   4,   8,   12,  16,  20,  24,  28,
+    32,  36,  40,  45,  49,  53,  57,  61,
+    65,  69,  73,  77,  81,  85,  89,  93,
+    97,  101, 105, 109, 113, 117, 121, 125,
+    130, 134, 138, 142, 146, 150, 154, 158,
+    162, 166, 170, 174, 178, 182, 186, 190,
+    194, 198, 202, 206, 210, 215, 219, 223,
+    227, 231, 235, 239, 243, 247, 251, 255
+};
+
+
+#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
+#define UP5(c) _rgb_scale_5[(c) & 31]
+#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
+#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
+#define ZERO_4UBV(v) *((dword *)(v)) = 0
+
+
+static void
+fxt1_decode_1HI (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+
+    t *= 3;
+    cc = (const dword *)(code + t / 8);
+    t = (cc[0] >> (t & 7)) & 7;
+
+    if (t == 7) {
+	ZERO_4UBV(rgba);
+    } else {
+	cc = (const dword *)(code + 12);
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 0));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 5));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 10));
+	} else if (t == 6) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 15));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 20));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 25));
+	} else {
+	    rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
+	    rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
+	    rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
+	}
+	rgba[ACOMP] = 255;
+    }
+}
+
+
+static void
+fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+    dword kk;
+
+    cc = (const dword *)code;
+    if (t & 16) {
+	cc++;
+	t &= 15;
+    }
+    t = (cc[0] >> (t * 2)) & 3;
+
+    t *= 15;
+    cc = (const dword *)(code + 8 + t / 8);
+    kk = cc[0] >> (t & 7);
+    rgba[BCOMP] = UP5(kk);
+    rgba[GCOMP] = UP5(kk >> 5);
+    rgba[RCOMP] = UP5(kk >> 10);
+    rgba[ACOMP] = 255;
+}
+
+
+static void
+fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+    int col[2][3];
+    int glsb, selb;
+
+    cc = (const dword *)code;
+    if (t & 16) {
+	t &= 15;
+	t = (cc[1] >> (t * 2)) & 3;
+	/* col 2 */
+	col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;
+	col[0][GCOMP] = CC_SEL(cc, 99);
+	col[0][RCOMP] = CC_SEL(cc, 104);
+	/* col 3 */
+	col[1][BCOMP] = CC_SEL(cc, 109);
+	col[1][GCOMP] = CC_SEL(cc, 114);
+	col[1][RCOMP] = CC_SEL(cc, 119);
+	glsb = CC_SEL(cc, 126);
+	selb = CC_SEL(cc, 33);
+    } else {
+	t = (cc[0] >> (t * 2)) & 3;
+	/* col 0 */
+	col[0][BCOMP] = CC_SEL(cc, 64);
+	col[0][GCOMP] = CC_SEL(cc, 69);
+	col[0][RCOMP] = CC_SEL(cc, 74);
+	/* col 1 */
+	col[1][BCOMP] = CC_SEL(cc, 79);
+	col[1][GCOMP] = CC_SEL(cc, 84);
+	col[1][RCOMP] = CC_SEL(cc, 89);
+	glsb = CC_SEL(cc, 125);
+	selb = CC_SEL(cc, 1);
+    }
+
+    if (CC_SEL(cc, 124) & 1) {
+	/* alpha[0] == 1 */
+
+	if (t == 3) {
+	    ZERO_4UBV(rgba);
+	} else {
+	    if (t == 0) {
+		rgba[BCOMP] = UP5(col[0][BCOMP]);
+		rgba[GCOMP] = UP5(col[0][GCOMP]);
+		rgba[RCOMP] = UP5(col[0][RCOMP]);
+	    } else if (t == 2) {
+		rgba[BCOMP] = UP5(col[1][BCOMP]);
+		rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
+		rgba[RCOMP] = UP5(col[1][RCOMP]);
+	    } else {
+		rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
+		rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
+		rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
+	    }
+	    rgba[ACOMP] = 255;
+	}
+    } else {
+	/* alpha[0] == 0 */
+
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(col[0][BCOMP]);
+	    rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
+	    rgba[RCOMP] = UP5(col[0][RCOMP]);
+	} else if (t == 3) {
+	    rgba[BCOMP] = UP5(col[1][BCOMP]);
+	    rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
+	    rgba[RCOMP] = UP5(col[1][RCOMP]);
+	} else {
+	    rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
+	    rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
+				     UP6(col[1][GCOMP], glsb));
+	    rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
+	}
+	rgba[ACOMP] = 255;
+    }
+}
+
+
+static void
+fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)
+{
+    const dword *cc;
+
+    cc = (const dword *)code;
+    if (CC_SEL(cc, 124) & 1) {
+	/* lerp == 1 */
+	int col0[4];
+
+	if (t & 16) {
+	    t &= 15;
+	    t = (cc[1] >> (t * 2)) & 3;
+	    /* col 2 */
+	    col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;
+	    col0[GCOMP] = CC_SEL(cc, 99);
+	    col0[RCOMP] = CC_SEL(cc, 104);
+	    col0[ACOMP] = CC_SEL(cc, 119);
+	} else {
+	    t = (cc[0] >> (t * 2)) & 3;
+	    /* col 0 */
+	    col0[BCOMP] = CC_SEL(cc, 64);
+	    col0[GCOMP] = CC_SEL(cc, 69);
+	    col0[RCOMP] = CC_SEL(cc, 74);
+	    col0[ACOMP] = CC_SEL(cc, 109);
+	}
+
+	if (t == 0) {
+	    rgba[BCOMP] = UP5(col0[BCOMP]);
+	    rgba[GCOMP] = UP5(col0[GCOMP]);
+	    rgba[RCOMP] = UP5(col0[RCOMP]);
+	    rgba[ACOMP] = UP5(col0[ACOMP]);
+	} else if (t == 3) {
+	    rgba[BCOMP] = UP5(CC_SEL(cc, 79));
+	    rgba[GCOMP] = UP5(CC_SEL(cc, 84));
+	    rgba[RCOMP] = UP5(CC_SEL(cc, 89));
+	    rgba[ACOMP] = UP5(CC_SEL(cc, 114));
+	} else {
+	    rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
+	    rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
+	    rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
+	    rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
+	}
+    } else {
+	/* lerp == 0 */
+
+	if (t & 16) {
+	    cc++;
+	    t &= 15;
+	}
+	t = (cc[0] >> (t * 2)) & 3;
+
+	if (t == 3) {
+	    ZERO_4UBV(rgba);
+	} else {
+	    dword kk;
+	    cc = (const dword *)code;
+	    rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
+	    t *= 15;
+	    cc = (const dword *)(code + 8 + t / 8);
+	    kk = cc[0] >> (t & 7);
+	    rgba[BCOMP] = UP5(kk);
+	    rgba[GCOMP] = UP5(kk >> 5);
+	    rgba[RCOMP] = UP5(kk >> 10);
+	}
+    }
+}
+
+
+TAPI void TAPIENTRY
+fxt1_decode_1 (const void *texture, int stride,
+	       int i, int j, byte *rgba)
+{
+    static void (*decode_1[]) (const byte *, int, byte *) = {
+	fxt1_decode_1HI,	/* cc-high   = "00?" */
+	fxt1_decode_1HI,	/* cc-high   = "00?" */
+	fxt1_decode_1CHROMA,	/* cc-chroma = "010" */
+	fxt1_decode_1ALPHA,	/* alpha     = "011" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED,	/* mixed     = "1??" */
+	fxt1_decode_1MIXED	/* mixed     = "1??" */
+    };
+
+    const byte *code = (const byte *)texture +
+			((j / 4) * (stride / 8) + (i / 8)) * 16;
+    int mode = CC_SEL(code, 125);
+    int t = i & 7;
+
+    if (t & 4) {
+	t += 12;
+    }
+    t += (j & 3) * 4;
+
+    decode_1[mode](code, t, rgba);
+
+#if VERBOSE
+    {
+	extern int cc_chroma;
+	extern int cc_alpha;
+	extern int cc_high;
+	extern int cc_mixed;
+	static int *cctype[] = {
+	    &cc_high,
+	    &cc_high,
+	    &cc_chroma,
+	    &cc_alpha,
+	    &cc_mixed,
+	    &cc_mixed,
+	    &cc_mixed,
+	    &cc_mixed
+	};
+	(*cctype[mode])++;
+    }
+#endif
+}
diff --git a/GLideNHQ/tc-1.1+/fxt1.h b/GLideNHQ/tc-1.1+/fxt1.h
new file mode 100644
index 00000000..c2919bba
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/fxt1.h
@@ -0,0 +1,38 @@
+/*
+ * FXT1 codec
+ * Version:  1.1
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef FXT1_H_included
+#define FXT1_H_included
+
+TAPI int TAPIENTRY
+fxt1_encode (int width, int height, int comps,
+             const void *source, int srcRowStride,
+             void *dest, int destRowStride);
+
+TAPI void TAPIENTRY
+fxt1_decode_1 (const void *texture, int stride /* in pixels */,
+	       int i, int j, byte *rgba);
+
+#endif
diff --git a/GLideNHQ/tc-1.1+/fxt1license.txt b/GLideNHQ/tc-1.1+/fxt1license.txt
new file mode 100644
index 00000000..234c6c6b
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/fxt1license.txt
@@ -0,0 +1,244 @@
+3DFX FXT1 Source Code General Public License
+
+
+1. PREAMBLE
+
+	This license is for software that provides texture compression and 
+	decompression, particularly in the context of video games. The license
+	is intended to offer terms similar to some standard General Public 
+	Licenses designed to foster open standards and unrestricted 
+	accessibility to source code. Some of these licenses require that, as
+	a condition of the license of the software, any derivative works 
+	(that is, new software which is a work containing the original program
+	or a portion of it) must be available for general use, without
+	restriction other than for a minor transfer fee, and that the source
+	code for such derivative works must likewise be made available. The 
+	only restriction is that such derivative works must be subject to 
+	the same General Public License terms as the original work. 
+
+	This 3dfx FXT1 Source Code General Public License differs from the
+	standard licenses of this type in that it does not require the entire
+	derivative work to be made available under the terms of this license
+	nor is the recipient required to make available the source code for
+	the entire derivative work. Rather, the license is limited to only the
+	identifiable portion of the derivative work that is derived from the
+	licensed software. The precise terms and conditions for copying, 
+	distribution and modification follow.
+
+
+2. DEFINITIONS
+
+	2.1 This License applies to any program (or other "work") which 
+	contains a notice placed by the copyright holder saying it may be 
+	distributed under the terms of this 3dfx FXT1 Source Code General 
+	Public License. 
+
+	2.2 The term "Program" as used in this Agreement refers to 3DFX's 
+	FXT1 source code and object code and any Derivative Work.
+
+	2.3 "Derivative Work" means, for the purpose of the License, that 
+	portion of any work that contains the Program or the identifiable 
+	portion of a work that is derived from the Program, either verbatim or
+	with modifications and/or translated into another language, and that 
+	performs texture compression and decompression. It does not include 
+	any other portions of a work.
+
+	2.4 "Modifications of the Program" means any work, which includes a
+	Derivative Work, and includes the whole of such work.
+
+	2.5 "License" means this 3dfx FXT1 Source Code General Public License.
+
+	2.6 The "Source Code" for a work means the preferred form of the work
+	for making modifications to it. For an executable work, complete source
+	code means all the source code for all modules it contains, any
+	associated interface definition files, and the scripts used to control
+	compilation and installation of the executable work.
+
+	2.7 "3dfx" means 3dfx Interactive, Inc.
+
+
+3. LICENSED ACTIVITIES
+
+	3.1 COPYING - You may copy and distribute verbatim copies of the 
+	Program's Source Code as you receive it, in any medium, subject to the
+	provision of section 3.3 and provided also that:
+
+		(a) you conspicuously and appropriately publish on each copy
+	an appropriate copyright notice (3dfx Interactive, Inc. 1999), a notice
+	that recipients who wish to copy, distribute or modify the Program can
+	only do so subject to this License, and a disclaimer of warranty as
+	set forth in section 5;
+
+		(b) keep intact all the notices that refer to this License and
+	to the absence of any warranty; and
+ 
+		(c) give all recipients of the Program a copy of this License
+	along with the Program or instructions on how to easily receive a copy
+	of this License.
+
+
+	3.2 MODIFICATION OF THE PROGRAM/DERIVATIVE WORKS - You may modify your
+	copy or copies of the Program or any portion of it, and copy and
+	distribute such modifications subject to the provisions of section 3.3
+	and provided that you also meet all of the following conditions: 
+
+		(a) you conspicuously and appropriately publish on each copy
+	of a Derivative Work an appropriate copyright notice, a notice that
+	recipients who wish to copy, distribute or modify the Derivative Work
+	can only do so subject to this License, and a disclaimer of warranty
+	as set forth in section 5;
+
+		(b) keep intact all the notices that refer to this License and
+	to the absence of any warranty; and
+ 
+		(c) give all recipients of the Derivative Work a copy of this
+	License along with the Derivative Work or instructions on how to easily
+	receive a copy of this License.
+
+		(d) You must cause the modified files of the Derivative Work
+	to carry prominent notices stating that you changed the files and the
+	date of any change. 
+
+		(e) You must cause any Derivative Work that you distribute or
+	publish to be licensed at no charge to all third parties under the
+	terms of this License. 
+
+		(f) If the Derivative Work normally reads commands 
+	interactively when run, you must cause it, when started running for
+	such interactive use, to print or display an announcement as follows:
+
+	"COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED THIS 
+	SOFTWARE IS FREE AND PROVIDED "AS IS," WITHOUT WARRANTY OF ANY KIND, 
+	EITHER EXPRESSED OR IMPLIED. SEE THE 3DFX FXT1 GENERAL PUBLIC LICENSE
+	FOR A FULL TEXT OF THE DISTRIBUTION AND NON-WARRANTY PROVISIONS 
+	(REQUEST COPY FROM INFO@3DFX.COM)."
+
+		(g) The requirements of this section 3.2 do not apply to the
+	modified work as a whole but only to the Derivative Work. It is not
+	the intent of this License to claim rights or contest your rights to
+	work written entirely by you; rather, the intent is to exercise the
+	right to control the distribution of Derivative Works. 
+
+
+	3.3 DISTRIBUTION 
+
+		(a) All copies of the Program or Derivative Works which are
+	distributed must include in the file headers the following language
+	verbatim:
+
+	"THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED 
+	ONLY PURSUANT TO THE 3DFX FXT1 GENERAL PUBLIC LICENSE. A COPY OF THIS
+	LICENSE MAY BE OBTAINED FROM THE DISTRIBUTOR OR BY CONTACTING 3DFX
+	INTERACTIVE INC (info@3dfx.com). THIS PROGRAM. IS PROVIDED "AS IS"
+	WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. SEE THE
+	3DFX FXT1 GENERAL PUBLIC LICENSE FOR A FULL TEXT OF THE NON-WARRANTY
+	PROVISIONS.
+
+	USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
+	RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS
+	IN TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
+	AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
+	SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
+	THE UNITED STATES.
+
+	COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED"
+
+		(b) You may distribute the Program or a Derivative Work in
+	object code or executable form under the terms of Sections 3.1 and 3.2
+	provided that you also do one of the following: 
+
+			(1) Accompany it with the complete corresponding
+	machine-readable source code, which must be distributed under the 
+	terms of Sections 3.1 and 3.2; or,
+ 
+			(2) Accompany it with a written offer, valid for at
+	least three years, to give any third party, for a charge no more than
+	your cost of physically performing source distribution, a complete
+	machine-readable copy of the corresponding source code, to be 
+	distributed under the terms of Sections 3.1 and 3.2 on a medium 
+	customarily used for software interchange; or,
+ 
+			(3) Accompany it with the information you received as
+	to the offer to distribute corresponding source code. (This alternative
+	is allowed only for noncommercial distribution and only if you received
+	the program in object code or executable form with such an offer, in
+	accord with Subsection 3.3(b)(2) above.)
+ 
+		(c) The source code distributed need not include anything
+	that is normally distributed (in either source or binary form) with
+	the major components (compiler, kernel, and so on) of the operating
+	system on which the executable runs, unless that component itself
+	accompanies the executable code.
+ 
+		(d) If distribution of executable code or object code is made
+	by offering access to copy from a designated place, then offering
+	equivalent access to copy the source code from the same place counts
+	as distribution of the source code, even though third parties are not
+	compelled to copy the source along with the object code. 
+
+		(e) Each time you redistribute the Program or any Derivative
+	Work, the recipient automatically receives a license from 3dfx and
+	successor licensors to copy, distribute or modify the Program and
+	Derivative Works subject to the terms and conditions of the License.
+	You may not impose any further restrictions on the recipients' 
+	exercise of the rights granted herein. You are not responsible for
+	enforcing compliance by third parties to this License.
+ 
+		(f) You may not copy, modify, sublicense, or distribute the
+	Program or any Derivative Works except as expressly provided under
+	this License. Any attempt otherwise to copy, modify, sublicense or
+	distribute the Program or any Derivative Works is void, and will
+	automatically terminate your rights under this License. However,
+	parties who have received copies, or rights, from you under this
+	License will not have their licenses terminated so long as such
+	parties remain in full compliance.
+
+ 
+4. MISCELLANEOUS
+
+	4.1 Acceptance of this License is voluntary. By using, modifying or
+	distributing the Program or any Derivative Work, you indicate your 
+	acceptance of this License to do so, and all its terms and conditions
+	for copying, distributing or modifying the Program or works based on
+	it. Nothing else grants you permission to modify or distribute the
+	Program or Derivative Works and doing so without acceptance of this
+	License is in violation of the U.S. and international copyright laws.
+
+	4.2 If the distribution and/or use of the Program or Derivative Works
+	is restricted in certain countries either by patents or by copyrighted
+	interfaces, the original copyright holder who places the Program under
+	this License may add an explicit geographical distribution limitation
+	excluding those countries, so that distribution is permitted only in
+	or among countries not thus excluded. In such case, this License
+	incorporates the limitation as if written in the body of this License.
+
+	4.3 This License is to be construed according to the laws of the 
+	State of California and you consent to personal jurisdiction in the
+	State of California in the event it is necessary to enforce the
+	provisions of this License.
+
+
+5. NO WARRANTIES
+
+	5.1 TO THE EXTENT PERMITTED BY APPLICABLE LAW, THERE IS NO WARRANTY
+	FOR THE PROGRAM. OR DERIVATIVE WORKS THE COPYRIGHT HOLDERS AND/OR
+	OTHER PARTIES PROVIDE THE PROGRAM AND ANY DERIVATIVE WORKS"AS IS"
+	WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+	BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+	FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
+	AND PERFORMANCE OF THE PROGRAM AND ANY DERIVATIVE WORK IS WITH YOU.
+	SHOULD THE PROGRAM OR ANY DERIVATIVE WORK PROVE DEFECTIVE, YOU ASSUME
+	THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+	5.2 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL 3DFX
+	INTERACTIVE, INC., OR ANY OTHER COPYRIGHT HOLDER, OR ANY OTHER PARTY
+	WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM OR DERIVATIVE WORKS AS
+	PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL,
+	SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+	INABILITY TO USE THE PROGRAM OR DERIVATIVE WORKS (INCLUDING BUT NOT
+	LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
+	SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM OR
+	DERIVATIVE WORKS TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH
+	HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+	DAMAGES.
+
diff --git a/GLideNHQ/tc-1.1+/internal.h b/GLideNHQ/tc-1.1+/internal.h
new file mode 100644
index 00000000..f1cd6dca
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/internal.h
@@ -0,0 +1,137 @@
+/*
+ * Texture compression
+ * Version:  1.0
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#ifndef INTERNAL_H_included
+#define INTERNAL_H_included
+
+/*****************************************************************************\
+ * DLL stuff
+\*****************************************************************************/
+
+#ifdef __WIN32__
+#define TAPI __declspec(dllexport)
+#define TAPIENTRY /*__stdcall*/
+#else
+#define TAPI
+#define TAPIENTRY
+#endif
+
+
+/*****************************************************************************\
+ * 64bit types on 32bit machine
+\*****************************************************************************/
+
+#if (defined(__GNUC__) && !defined(__cplusplus)) || defined(__MSC__)
+
+typedef unsigned long long qword;
+
+#define Q_MOV32(a, b) a = b
+#define Q_OR32(a, b)  a |= b
+#define Q_SHL(a, c)   a <<= c
+
+#else  /* !__GNUC__ */
+
+typedef struct {
+    dword lo, hi;
+} qword;
+
+#define Q_MOV32(a, b) a.lo = b
+#define Q_OR32(a, b)  a.lo |= b
+#define Q_SHL(a, c)					\
+    do {						\
+	if ((c) >= 32) {				\
+	    a.hi = a.lo << ((c) - 32);			\
+	    a.lo = 0;					\
+	} else {					\
+	    a.hi = (a.hi << (c)) | (a.lo >> (32 - (c)));\
+	    a.lo <<= c;					\
+	}						\
+    } while (0)
+
+#endif /* !__GNUC__ */
+
+
+/*****************************************************************************\
+ * Config
+\*****************************************************************************/
+
+#define RCOMP 0
+#define GCOMP 1
+#define BCOMP 2
+#define ACOMP 3
+
+/*****************************************************************************\
+ * Metric
+\*****************************************************************************/
+
+#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
+#define SAFECDOT 1 /* for paranoids */
+
+#define MAKEIVEC(NV, NC, IV, B, V0, V1)	\
+    do {				\
+	/* compute interpolation vector */\
+	float d2 = 0.0F;		\
+	float rd2;			\
+					\
+	for (i = 0; i < NC; i++) {	\
+	    IV[i] = (V1[i] - V0[i]) * F(i);\
+	    d2 += IV[i] * IV[i];	\
+	}				\
+	rd2 = (float)NV / d2;		\
+	B = 0;				\
+	for (i = 0; i < NC; i++) {	\
+	    IV[i] *= F(i);		\
+	    B -= IV[i] * V0[i];		\
+	    IV[i] *= rd2;		\
+	}				\
+	B = B * rd2 + 0.5F;		\
+    } while (0)
+
+#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
+    do {				\
+	float dot = 0.0F;		\
+	for (i = 0; i < NC; i++) {	\
+	    dot += V[i] * IV[i];	\
+	}				\
+	TEXEL = (int)(dot + B);		\
+	if (SAFECDOT) {			\
+	    if (TEXEL < 0) {		\
+		TEXEL = 0;		\
+	    } else if (TEXEL > NV) {	\
+		TEXEL = NV;		\
+	    }				\
+	}				\
+    } while (0)
+
+
+/*****************************************************************************\
+ * Utility functions
+\*****************************************************************************/
+
+void
+_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
+			  unsigned int outWidth, unsigned int outHeight,
+			  unsigned int comps,
+			  const byte *src, int srcRowStride,
+			  unsigned char *dest);
+
+#endif
diff --git a/GLideNHQ/tc-1.1+/texstore.c b/GLideNHQ/tc-1.1+/texstore.c
new file mode 100644
index 00000000..2eb0306f
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/texstore.c
@@ -0,0 +1,93 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Copyright (C) 2007  Hiroshi Morii <koolsmoky(at)users.sourceforge.net>
+ * _mesa_upscale_teximage2d speedup
+ */
+
+#include <assert.h>
+
+#include "types.h"
+#include "internal.h"
+
+
+void
+_mesa_upscale_teximage2d (unsigned int inWidth, unsigned int inHeight,
+			  unsigned int outWidth, unsigned int outHeight,
+			  unsigned int comps,
+			  const byte *src, int srcRowStride,
+			  byte *dest)
+{
+    unsigned int i, j, k;
+
+    assert(outWidth >= inWidth);
+    assert(outHeight >= inHeight);
+
+#if 1 /* H.Morii - faster loops */
+  for (i = 0; i < inHeight; i++) {
+    for (j = 0; j < inWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = i * srcRowStride + j * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+    for (; j < outWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = i * srcRowStride + (j - inWidth) * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+  }
+  for (; i < outHeight; i++) {
+    for (j = 0; j < inWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = (i - inHeight) * srcRowStride + j * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+    for (; j < outWidth; j++) {
+      const int aa = (i * outWidth + j) * comps;
+      const int bb = (i - inHeight) * srcRowStride + (j - inWidth) * comps;
+      for (k = 0; k < comps; k++) {
+        dest[aa + k] = src[bb + k];
+      }
+    }
+  }
+#else
+    for (i = 0; i < outHeight; i++) {
+	const int ii = i % inHeight;
+	for (j = 0; j < outWidth; j++) {
+	    const int jj = j % inWidth;
+            const int aa = (i * outWidth + j) * comps;
+            const int bb = ii * srcRowStride + jj * comps;
+	    for (k = 0; k < comps; k++) {
+		dest[aa + k] = src[bb + k];
+	    }
+	}
+    }
+#endif
+}
diff --git a/GLideNHQ/tc-1.1+/types.h b/GLideNHQ/tc-1.1+/types.h
new file mode 100644
index 00000000..40e7153a
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/types.h
@@ -0,0 +1,11 @@
+#ifndef TYPES_H_included
+#define TYPES_H_included
+
+/*****************************************************************************\
+ * 32bit types
+\*****************************************************************************/
+typedef unsigned char byte;	/*  8-bit */
+typedef unsigned short word;	/* 16-bit */
+typedef unsigned int dword;	/* 32-bit */
+
+#endif
diff --git a/GLideNHQ/tc-1.1+/wrapper.c b/GLideNHQ/tc-1.1+/wrapper.c
new file mode 100644
index 00000000..7bc8d610
--- /dev/null
+++ b/GLideNHQ/tc-1.1+/wrapper.c
@@ -0,0 +1,110 @@
+/*
+ * Texture compression
+ * Version:  1.0
+ *
+ * Copyright (C) 2004  Daniel Borca   All Rights Reserved.
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	
+ */
+
+
+#include <assert.h>
+
+#include "types.h"
+#include "internal.h"
+#include "dxtn.h"
+
+
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT   0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT  0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT  0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT  0x83F3
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgb_dxt1 (int texImage_RowStride,
+			 const byte *texImage_Data,
+			 int i, int j,
+			 byte *texel)
+{
+    dxt1_rgb_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt1 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt1_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt3 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt3_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+fetch_2d_texel_rgba_dxt5 (int texImage_RowStride,
+			  const byte *texImage_Data,
+			  int i, int j,
+			  byte *texel)
+{
+    dxt5_rgba_decode_1(texImage_Data, texImage_RowStride, i, j, texel);
+}
+
+
+TAPI void TAPIENTRY
+tx_compress_dxtn (int srccomps, int width, int height,
+		  const byte *source, int destformat, byte *dest,
+		  int destRowStride)
+{
+    int srcRowStride = width * srccomps;
+    int rv;
+
+    switch (destformat) {
+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+	    rv = dxt1_rgb_encode(width, height, srccomps,
+				 source, srcRowStride,
+				 dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+	    rv = dxt1_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+	    rv = dxt3_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+	    rv = dxt5_rgba_encode(width, height, srccomps,
+				  source, srcRowStride,
+				  dest, destRowStride);
+	    break;
+	default:
+	    assert(0);
+    }
+
+    /*return rv;*/
+}
diff --git a/GLideNHQ/test/Makefile.gcc b/GLideNHQ/test/Makefile.gcc
new file mode 100644
index 00000000..e76af4bf
--- /dev/null
+++ b/GLideNHQ/test/Makefile.gcc
@@ -0,0 +1,70 @@
+# This MUST be processed by GNU make
+#
+# Texture Filtering Test Linux Makefile
+# Version:  1.0
+#
+# Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+# Email koolsmoky(at)users.sourceforge.net
+# Web   http://www.3dfxzone.it/koolsmoky
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+#
+#  Available options:
+#
+#    Environment variables:
+#
+#    Targets:
+#	all:		build dynamic module
+#	clean:		remove object files
+#	realclean:	remove all generated files
+#
+
+#
+# GCC does not have SEH (structured exception handling)
+#
+
+.PHONY: all clean realclean
+
+CC = g++
+CFLAGS += -I. -I../
+CFLAGS += -fPIC -DPIC
+CFLAGS += -DGHQCHK=1
+
+LD = g++
+LDFLAGS += -ldl -lstdc++
+
+RM = rm
+
+SOURCES = \
+	test.cpp \
+	../Ext_TxFilter.cpp
+
+OBJECTS = $(SOURCES:.cpp=.o)
+
+.cpp.o:
+	$(CC) -o $@ $(CFLAGS) -c $<
+
+all: test.exe
+
+test.exe: $(OBJECTS)
+	$(LD) -o $@ $(LDFLAGS) $^
+
+clean:
+	-$(RM) *.o
+
+realclean: clean
+	-$(RM) test.exe
diff --git a/GLideNHQ/test/Makefile.vc8 b/GLideNHQ/test/Makefile.vc8
new file mode 100644
index 00000000..007f5389
--- /dev/null
+++ b/GLideNHQ/test/Makefile.vc8
@@ -0,0 +1,68 @@
+# This MUST be processed by GNU make
+#
+# Texture Filtering Test MSVC Makefile
+# Version:  1.0
+#
+# Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+# Email koolsmoky(at)users.sourceforge.net
+# Web   http://www.3dfxzone.it/koolsmoky
+#
+# this is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# this is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Make; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+#
+#  Available options:
+#
+#    Targets:
+#	all:		build everything
+#	clean:		remove object files
+#       realclean:      remove all generated files
+#
+
+.PHONY: all clean realclean
+.SUFFIXES: .cpp .obj
+
+CC = cl
+LD = _link # change this to suite your build environment
+
+UNLINK = $(RM) $(1)
+
+CFLAGS += -D__MSC__ -DWIN32 -D_CONSOLE -EHa -D_CRT_SECURE_NO_DEPRECATE
+CFLAGS += -I. -I../
+CFLAGS += -DGHQCHK=1
+
+#LDFLAGS += -ltcg:STATUS
+
+SOURCES = \
+	test.cpp \
+	../Ext_TxFilter.cpp
+
+OBJECTS = $(SOURCES:.cpp=.obj)
+
+.cpp.obj:
+	$(CC) -Fo$@ $(CFLAGS) -c $<
+
+all: test.exe
+
+test.exe: $(OBJECTS)
+	$(LD) -out:$@ $(LDFLAGS) $(OBJECTS)
+
+$(OBJECTS): $(SOURCES)
+
+clean:
+	-$(RM) *.obj *.pdb *.ilk
+
+realclean: clean
+	-$(RM) test.exe
diff --git a/GLideNHQ/test/test.cpp b/GLideNHQ/test/test.cpp
new file mode 100644
index 00000000..6c83972b
--- /dev/null
+++ b/GLideNHQ/test/test.cpp
@@ -0,0 +1,120 @@
+/*
+ * Texture Filtering
+ * Version:  1.0
+ *
+ * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
+ * Email koolsmoky(at)users.sourceforge.net
+ * Web   http://www.3dfxzone.it/koolsmoky
+ *
+ * this is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * this is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "Ext_TxFilter.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+void DisplayLoadProgress(const wchar_t *format, ...)
+{
+#if 0
+  va_list args;
+  wchar_t wbuf[INFO_BUF];
+  char buf[INFO_BUF];
+
+  /* process input */
+  va_start(args, format);
+  vswprintf(wbuf, format, args);
+  va_end(args);
+
+  /* XXX: convert to multibyte */
+  wcstombs(buf, wbuf, INFO_BUF);
+  printf(buf);
+#else
+  static unsigned int i = 0;
+  i++;
+  if (i == 1) printf("\b-");
+  else if (i == 2) printf("\b\\");
+  else if (i == 3) printf("\b|");
+  else {
+    printf("\b/");
+    i = 0;
+  }
+#endif
+}
+
+int main(int argc, char* argv[])
+{
+  float dummy = 1.1; /* force the compiler to load floating point support */
+  boolean bret = 0;
+  int options = 0;
+
+  /* Plugin path */
+  wchar_t path[MAX_PATH];
+#ifdef WIN32
+  GETCWD(MAX_PATH, path);
+#else
+  char cbuf[MAX_PATH];
+  GETCWD(MAX_PATH, cbuf);
+  mbstowcs(path, cbuf, MAX_PATH);
+#endif
+
+  /* ROM name */
+  wchar_t name[21] = L"DEFAULT";
+
+  printf("------------------------------------------------------------------\n");
+  printf("  GlideHQ Hires Texture Checker version 1.2\n");
+  printf("  Copyright (C) 2010  Hiroshi Morii   All Rights Reserved\n");
+  printf("     email   : koolsmoky(at)users.sourceforge.net\n");
+  printf("     website : http://www.3dfxzone.it/koolsmoky\n");
+  printf("\n");
+  printf("  Glide64 official website : http://glide64.emuxhaven.net\n");
+  printf("\n");
+  printf("  Usage: ghqchk.exe \"INTERNAL ROM NAME\"\n");
+  printf("------------------------------------------------------------------\n");
+
+  if (argc != 2) return 0;
+
+  printf("Checking \"%s\"...  ", argv[1]);
+
+  mbstowcs(name, argv[1], 21);
+
+  //options |= COMPRESS_TEX;
+  //options |= COMPRESS_HIRESTEX;
+  //options |= S3TC_COMPRESSION;
+  //options |= TILE_HIRESTEX;
+  //options |= FORCE16BPP_TEX;
+  //options |= FORCE16BPP_HIRESTEX;
+  //options |= GZ_TEXCACHE;
+  options |= GZ_HIRESTEXCACHE;
+  //options |= (DUMP_TEXCACHE|DUMP_HIRESTEXCACHE);
+  options |= LET_TEXARTISTS_FLY;
+  //options |= DUMP_TEX;
+  options |= RICE_HIRESTEXTURES;
+
+  bret = ext_ghq_init(1024, // max texture width supported by hardware
+                      1024, // max texture height supported by hardware
+                      32, // max texture bpp supported by hardware
+                      options,
+                      0, // cache texture to system memory
+                      path, // plugin path
+                      name, // name of ROM. must be no longer than 256 characters
+                      DisplayLoadProgress);
+
+  ext_ghq_shutdown();
+
+  printf("\bDone!\nLogged to ghqchk.txt\n");
+
+  return bret;
+}