From 38dd7fba1632139ca997b4854474f37f69c92300 Mon Sep 17 00:00:00 2001
From: Paul Rashidi <paulrashidi@google.com>
Date: Mon, 11 Jul 2016 18:07:34 -0700
Subject: [PATCH] Initial source and xcode build.

---
 .gitignore                                    |   32 +
 AUTHORS                                       |   14 +-
 CMakeLists.txt                                |   29 +
 CONTRIBUTING.md                               |   94 +-
 EtcLib/CMakeLists.txt                         |   24 +
 EtcLib/Etc/Etc.cpp                            |   58 +
 EtcLib/Etc/Etc.h                              |   43 +
 EtcLib/Etc/EtcColor.h                         |   64 +
 EtcLib/Etc/EtcColorFloatRGBA.h                |  321 +
 EtcLib/Etc/EtcConfig.h                        |   67 +
 EtcLib/Etc/EtcImage.cpp                       |  685 ++
 EtcLib/Etc/EtcImage.h                         |  249 +
 EtcLib/Etc/EtcMath.cpp                        |   64 +
 EtcLib/Etc/EtcMath.h                          |   40 +
 EtcLib/EtcCodec/EtcBlock4x4.cpp               |  417 ++
 EtcLib/EtcCodec/EtcBlock4x4.h                 |  172 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp       |  246 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding.h         |  148 +
 EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h     |  315 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp  | 1280 ++++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h    |  186 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp   |  429 ++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h     |  122 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp  |  447 ++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h    |   86 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp  | 1727 +++++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h    |   96 +
 .../EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp   | 1819 +++++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h  |  129 +
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp |  474 ++
 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h   |  121 +
 EtcLib/EtcCodec/EtcDifferentialTrys.cpp       |  173 +
 EtcLib/EtcCodec/EtcDifferentialTrys.h         |   97 +
 EtcLib/EtcCodec/EtcErrorMetric.h              |   51 +
 EtcLib/EtcCodec/EtcIndividualTrys.cpp         |   85 +
 EtcLib/EtcCodec/EtcIndividualTrys.h           |   95 +
 EtcLib/EtcCodec/EtcSortedBlockList.cpp        |  228 +
 EtcLib/EtcCodec/EtcSortedBlockList.h          |  124 +
 EtcTool/Args.txt                              |    7 +
 EtcTool/CMakeLists.txt                        |   28 +
 EtcTool/EtcAnalysis.cpp                       |  410 ++
 EtcTool/EtcAnalysis.h                         |   71 +
 EtcTool/EtcComparison.cpp                     |  637 ++
 EtcTool/EtcComparison.h                       |   61 +
 EtcTool/EtcFile.cpp                           |  311 +
 EtcTool/EtcFile.h                             |   99 +
 EtcTool/EtcFileHeader.cpp                     |  192 +
 EtcTool/EtcFileHeader.h                       |  146 +
 EtcTool/EtcMemTest.cpp                        |  258 +
 EtcTool/EtcMemTest.h                          |   61 +
 EtcTool/EtcSourceImage.cpp                    |  292 +
 EtcTool/EtcSourceImage.h                      |   93 +
 EtcTool/EtcTool.cpp                           |  719 ++
 EtcTool/EtcTool.h                             |   36 +
 LICENSE                                       |  404 +-
 README.md                                     |  189 +-
 third_party/lodepng/LICENSE                   |   25 +
 third_party/lodepng/lodepng.cpp               | 6168 +++++++++++++++++
 third_party/lodepng/lodepng.h                 | 1756 +++++
 59 files changed, 22549 insertions(+), 265 deletions(-)
 create mode 100755 .gitignore
 create mode 100644 CMakeLists.txt
 create mode 100644 EtcLib/CMakeLists.txt
 create mode 100644 EtcLib/Etc/Etc.cpp
 create mode 100644 EtcLib/Etc/Etc.h
 create mode 100644 EtcLib/Etc/EtcColor.h
 create mode 100644 EtcLib/Etc/EtcColorFloatRGBA.h
 create mode 100644 EtcLib/Etc/EtcConfig.h
 create mode 100644 EtcLib/Etc/EtcImage.cpp
 create mode 100644 EtcLib/Etc/EtcImage.h
 create mode 100644 EtcLib/Etc/EtcMath.cpp
 create mode 100644 EtcLib/Etc/EtcMath.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp
 create mode 100644 EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h
 create mode 100644 EtcLib/EtcCodec/EtcDifferentialTrys.cpp
 create mode 100644 EtcLib/EtcCodec/EtcDifferentialTrys.h
 create mode 100644 EtcLib/EtcCodec/EtcErrorMetric.h
 create mode 100644 EtcLib/EtcCodec/EtcIndividualTrys.cpp
 create mode 100644 EtcLib/EtcCodec/EtcIndividualTrys.h
 create mode 100644 EtcLib/EtcCodec/EtcSortedBlockList.cpp
 create mode 100644 EtcLib/EtcCodec/EtcSortedBlockList.h
 create mode 100644 EtcTool/Args.txt
 create mode 100644 EtcTool/CMakeLists.txt
 create mode 100644 EtcTool/EtcAnalysis.cpp
 create mode 100644 EtcTool/EtcAnalysis.h
 create mode 100644 EtcTool/EtcComparison.cpp
 create mode 100644 EtcTool/EtcComparison.h
 create mode 100644 EtcTool/EtcFile.cpp
 create mode 100644 EtcTool/EtcFile.h
 create mode 100644 EtcTool/EtcFileHeader.cpp
 create mode 100644 EtcTool/EtcFileHeader.h
 create mode 100644 EtcTool/EtcMemTest.cpp
 create mode 100644 EtcTool/EtcMemTest.h
 create mode 100644 EtcTool/EtcSourceImage.cpp
 create mode 100644 EtcTool/EtcSourceImage.h
 create mode 100644 EtcTool/EtcTool.cpp
 create mode 100644 EtcTool/EtcTool.h
 create mode 100644 third_party/lodepng/LICENSE
 create mode 100644 third_party/lodepng/lodepng.cpp
 create mode 100644 third_party/lodepng/lodepng.h

diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..4100db6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,32 @@
+# Copyright 2015 Etc2Comp Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+x64/
+Release/
+Debug/
+ipch/
+*.sdf
+.vs/
+*Debug/
+*Release/
+*debug*/
+*release*/
+*.db
+Analysis/
+CMakeFiles/
+*.vcxproj*
+*.sln
+*.cmake
+CmakeCache.txt
+build_*/*
+!build_*/gen_xcode.sh
diff --git a/AUTHORS b/AUTHORS
index e78a7f4..32daca2 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,7 +1,7 @@
-# This is the list of Etc2Comp authors for copyright purposes.
-#
-# This does not necessarily list everyone who has contributed code, since in
-# some cases, their employer may be the copyright holder.  To see the full list
-# of contributors, see the revision history in source control.
-Google Inc.
-Blue Shift Inc.
+# This is the list of Etc2Comp authors for copyright purposes.
+#
+# This does not necessarily list everyone who has contributed code, since in
+# some cases, their employer may be the copyright holder.  To see the full list
+# of contributors, see the revision history in source control.
+Google Inc.
+Blue Shift Inc.
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..6a6392b
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2015 Etc2Comp Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 2.8.9)
+project(EtcTest)
+
+set (CMAKE_CXX_STANDARD 11)
+IF (APPLE)
+	set (CMAKE_CXX_FLAGS "-I/usr/include/i386-linux-gnu/c++/4.8 -I/usr/include/c++/4.8 -std=c++11 -g3 -Wall -O3")
+ELSE ()
+	IF (WIN32)
+		set (CMAKE_CXX_FLAGS "-I/usr/include/i386-linux-gnu/c++/4.8 -I/usr/include/c++/4.8 -W4 /EHsc")
+	ELSE()
+		set (CMAKE_CXX_FLAGS "-I/usr/include/i386-linux-gnu/c++/4.8 -I/usr/include/c++/4.8 -std=c++11 -pthread -g3 -Wall -O2")
+	ENDIF()
+ENDIF ()
+ADD_SUBDIRECTORY(EtcLib)
+ADD_SUBDIRECTORY(EtcTool)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 273adea..02979b1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,48 +1,48 @@
-# Contributing | Etc2Comp
-
-Thank you for contributing to the Etc2Comp community!
-
- - [Have a usage question?](#question)
- - [Think you found a bug?](#issue)
- - [Have a feature request?](#feature)
- - [Want to submit a pull request?](#submit)
- - [Small print](#smallprint)
-
-## <a name="question"></a> Have a usage question?
-
- - Review the README.md to make sure you're building the binary correctly.
- - Execute the binary with -h to show the usage help.
- - Search through [old issues](https://github.com/google/etc2comp/issues)
- for an answer to your question.
- - If you still haven't found an answer to your question, [open a new issue](https://github.com/google/etc2comp/issues/new).
-Please use the provided bug report template and include a minimal repro.
-
-## <a name="issue"></a> Think you found a bug?
-
-The library is experimental so that's highly likely. Follow the same
-procedure above for questions. If you are up to the challenge,
-[submit a Pull Request](#submit) with a fix!
-
-## <a name="feature"></a> Have a feature request?
-
-Great! Make sure the feature request isn't already listed in 
-[existing issues](https://github.com/google/etc2comp/issues),
-then go ahead and [open a new issue](https://github.com/google/etc2comp/issues/new).
-Remove the default template information and specify what you are requesting
-technically, as well as, specifying what use cases it supports.
-
-## <a name="submit"></a> Want to submit a pull request?
-
-Sweet, we'd love to accept your contribution! [Open a new pull request](https://github.com/google/etc2comp/compare).
-
-If you want to implement a new feature, please open an issue with a
-proposal first to discuss the change.
-
-You will need to sign our [Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
-before we can accept your pull request.
-
-## <a name="smallprint"></a> The small print
-Contributions made by corporations are covered by a different
-agreement than the one above, the
-[Software Grant and Corporate Contributor License Agreement]
+# Contributing | Etc2Comp
+
+Thank you for contributing to the Etc2Comp community!
+
+ - [Have a usage question?](#question)
+ - [Think you found a bug?](#issue)
+ - [Have a feature request?](#feature)
+ - [Want to submit a pull request?](#submit)
+ - [Small print](#smallprint)
+
+## <a name="question"></a> Have a usage question?
+
+ - Review the README.md to make sure you're building the binary correctly.
+ - Execute the binary with -h to show the usage help.
+ - Search through [old issues](https://github.com/google/etc2comp/issues)
+ for an answer to your question.
+ - If you still haven't found an answer to your question, [open a new issue](https://github.com/google/etc2comp/issues/new).
+Please use the provided bug report template and include a minimal repro.
+
+## <a name="issue"></a> Think you found a bug?
+
+The library is experimental so that's highly likely. Follow the same
+procedure above for questions. If you are up to the challenge,
+[submit a Pull Request](#submit) with a fix!
+
+## <a name="feature"></a> Have a feature request?
+
+Great! Make sure the feature request isn't already listed in 
+[existing issues](https://github.com/google/etc2comp/issues),
+then go ahead and [open a new issue](https://github.com/google/etc2comp/issues/new).
+Remove the default template information and specify what you are requesting
+technically, as well as, specifying what use cases it supports.
+
+## <a name="submit"></a> Want to submit a pull request?
+
+Sweet, we'd love to accept your contribution! [Open a new pull request](https://github.com/google/etc2comp/compare).
+
+If you want to implement a new feature, please open an issue with a
+proposal first to discuss the change.
+
+You will need to sign our [Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
+before we can accept your pull request.
+
+## <a name="smallprint"></a> The small print
+Contributions made by corporations are covered by a different
+agreement than the one above, the
+[Software Grant and Corporate Contributor License Agreement]
 (https://cla.developers.google.com/about/google-corporate).
\ No newline at end of file
diff --git a/EtcLib/CMakeLists.txt b/EtcLib/CMakeLists.txt
new file mode 100644
index 0000000..b584b88
--- /dev/null
+++ b/EtcLib/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2015 The Etc2Comp Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+project(EtcLib)
+include_directories(./Etc)
+include_directories(./EtcCodec)
+
+file(GLOB SOURCES
+	${PROJECT_SOURCE_DIR}/Etc/*.h
+	${PROJECT_SOURCE_DIR}/EtcCodec/*.h
+	${PROJECT_SOURCE_DIR}/Etc/*.cpp
+	${PROJECT_SOURCE_DIR}/EtcCodec/*.cpp)
+ADD_LIBRARY(EtcLib ${SOURCES})
diff --git a/EtcLib/Etc/Etc.cpp b/EtcLib/Etc/Etc.cpp
new file mode 100644
index 0000000..87e1d9b
--- /dev/null
+++ b/EtcLib/Etc/Etc.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "Etc.h"
+
+#include <string.h>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	// C-style inteface to the encoder
+	//
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth, 
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uiMaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight, 
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput)
+	{
+
+		Image image(a_pafSourceRGBA, a_uiSourceWidth,
+					a_uiSourceHeight,
+					a_eErrMetric);
+		image.m_bVerboseOutput = a_bVerboseOutput;
+		image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs);
+
+		*a_ppaucEncodingBits = image.GetEncodingBits();
+		*a_puiEncodingBitsBytes = image.GetEncodingBitsBytes();
+		*a_puiExtendedWidth = image.GetExtendedWidth();
+		*a_puiExtendedHeight = image.GetExtendedHeight();
+		*a_piEncodingTime_ms = image.GetEncodingTimeMs();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/EtcLib/Etc/Etc.h b/EtcLib/Etc/Etc.h
new file mode 100644
index 0000000..6fa6d5c
--- /dev/null
+++ b/EtcLib/Etc/Etc.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+#include "EtcImage.h"
+#include "EtcColor.h"
+#include "EtcErrorMetric.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits;
+
+	// C-style inteface to the encoder
+	void Encode(float *a_pafSourceRGBA,
+				unsigned int a_uiSourceWidth,
+				unsigned int a_uiSourceHeight,
+				Image::Format a_format,
+				ErrorMetric a_eErrMetric,
+				float a_fEffort,
+				unsigned int a_uiJobs,
+				unsigned int a_uimaxJobs,
+				unsigned char **a_ppaucEncodingBits,
+				unsigned int *a_puiEncodingBitsBytes,
+				unsigned int *a_puiExtendedWidth,
+				unsigned int *a_puiExtendedHeight,
+				int *a_piEncodingTime_ms, bool a_bVerboseOutput = false);
+
+}
diff --git a/EtcLib/Etc/EtcColor.h b/EtcLib/Etc/EtcColor.h
new file mode 100644
index 0000000..0bd86e8
--- /dev/null
+++ b/EtcLib/Etc/EtcColor.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	inline float LogToLinear(float a_fLog)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLog <= 0.04045f)
+		{
+			return a_fLog / 12.92f;
+		}
+		else
+		{
+			return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f);
+		}
+	}
+
+	inline float LinearToLog(float &a_fLinear)
+	{
+		static const float ALPHA = 0.055f;
+		static const float ONE_PLUS_ALPHA = 1.0f + ALPHA;
+
+		if (a_fLinear <= 0.0031308f)
+		{
+			return 12.92f * a_fLinear;
+		}
+		else
+		{
+			return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA;
+		}
+	}
+
+	class ColorR8G8B8A8
+	{
+	public:
+
+		unsigned char ucR;
+		unsigned char ucG;
+		unsigned char ucB;
+		unsigned char ucA;
+
+	};
+}
diff --git a/EtcLib/Etc/EtcColorFloatRGBA.h b/EtcLib/Etc/EtcColorFloatRGBA.h
new file mode 100644
index 0000000..5afd6ef
--- /dev/null
+++ b/EtcLib/Etc/EtcColorFloatRGBA.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+#include "EtcColor.h"
+
+#include <math.h>
+
+namespace Etc
+{
+
+	class ColorFloatRGBA
+    {
+    public:
+
+		ColorFloatRGBA(void)
+        {
+            fR = fG = fB = fA = 0.0f;
+        }
+
+		ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA)
+        {
+            fR = a_fR;
+            fG = a_fG;
+            fB = a_fB;
+            fA = a_fA;
+        }
+
+		inline ColorFloatRGBA operator+(ColorFloatRGBA& a_rfrgba)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_rfrgba.fR;
+			frgba.fG = fG + a_rfrgba.fG;
+			frgba.fB = fB + a_rfrgba.fB;
+			frgba.fA = fA + a_rfrgba.fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator+(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR + a_f;
+			frgba.fG = fG + a_f;
+			frgba.fB = fB + a_f;
+			frgba.fA = fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator-(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR - a_f;
+			frgba.fG = fG - a_f;
+			frgba.fB = fB - a_f;
+			frgba.fA = fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator-(ColorFloatRGBA& a_rfrgba)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR - a_rfrgba.fR;
+			frgba.fG = fG - a_rfrgba.fG;
+			frgba.fB = fB - a_rfrgba.fB;
+			frgba.fA = fA - a_rfrgba.fA;
+			return frgba;
+		}
+
+		inline ColorFloatRGBA operator*(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = fR * a_f;
+			frgba.fG = fG * a_f;
+			frgba.fB = fB * a_f;
+			frgba.fA = fA;
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ScaleRGB(float a_f)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = a_f * fR;
+			frgba.fG = a_f * fG;
+			frgba.fB = a_f * fB;
+			frgba.fA = fA;
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA RoundRGB(void)
+		{
+			ColorFloatRGBA frgba;
+			frgba.fR = roundf(fR);
+			frgba.fG = roundf(fG);
+			frgba.fB = roundf(fB);
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ToLinear()
+		{
+			ColorFloatRGBA frgbaLinear;
+			frgbaLinear.fR = LogToLinear(fR);
+			frgbaLinear.fG = LogToLinear(fG);
+			frgbaLinear.fB = LogToLinear(fB);
+			frgbaLinear.fA = fA;
+
+			return frgbaLinear;
+		}
+
+		inline ColorFloatRGBA ToLog(void)
+		{
+			ColorFloatRGBA frgbaLog;
+			frgbaLog.fR = LinearToLog(fR);
+			frgbaLog.fG = LinearToLog(fG);
+			frgbaLog.fB = LinearToLog(fB);
+			frgbaLog.fA = fA;
+
+			return frgbaLog;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGBA8(unsigned char a_ucR, 
+			unsigned char a_ucG, unsigned char a_ucB, unsigned char a_ucA)
+		{
+			ColorFloatRGBA frgba;
+
+			frgba.fR = (float)a_ucR / 255.0f;
+			frgba.fG = (float)a_ucG / 255.0f;
+			frgba.fB = (float)a_ucB / 255.0f;
+			frgba.fA = (float)a_ucA / 255.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGB4(unsigned char a_ucR4,
+														unsigned char a_ucG4,
+														unsigned char a_ucB4)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR4 << 4) + a_ucR4);
+			unsigned char ucG8 = (unsigned char)((a_ucG4 << 4) + a_ucG4);
+			unsigned char ucB8 = (unsigned char)((a_ucB4 << 4) + a_ucB4);
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromRGB5(unsigned char a_ucR5,
+			unsigned char a_ucG5,
+			unsigned char a_ucB5)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR5 << 3) + (a_ucR5 >> 2));
+			unsigned char ucG8 = (unsigned char)((a_ucG5 << 3) + (a_ucG5 >> 2));
+			unsigned char ucB8 = (unsigned char)((a_ucB5 << 3) + (a_ucB5 >> 2));
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		inline static ColorFloatRGBA ConvertFromR6G7B6(unsigned char a_ucR6,
+			unsigned char a_ucG7,
+			unsigned char a_ucB6)
+		{
+			ColorFloatRGBA frgba;
+
+			unsigned char ucR8 = (unsigned char)((a_ucR6 << 2) + (a_ucR6 >> 4));
+			unsigned char ucG8 = (unsigned char)((a_ucG7 << 1) + (a_ucG7 >> 6));
+			unsigned char ucB8 = (unsigned char)((a_ucB6 << 2) + (a_ucB6 >> 4));
+
+			frgba.fR = (float)ucR8 / 255.0f;
+			frgba.fG = (float)ucG8 / 255.0f;
+			frgba.fB = (float)ucB8 / 255.0f;
+			frgba.fA = 1.0f;
+
+			return frgba;
+		}
+
+		// quantize to 4 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR4G4B4(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 4 bits
+			frgba = frgba.ClampRGB().ScaleRGB(15.0f).RoundRGB();
+			unsigned int uiR4 = (unsigned int)frgba.fR;
+			unsigned int uiG4 = (unsigned int)frgba.fG;
+			unsigned int uiB4 = (unsigned int)frgba.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float) ((uiR4 << 4) + uiR4);
+			frgba.fG = (float) ((uiG4 << 4) + uiG4);
+			frgba.fB = (float) ((uiB4 << 4) + uiB4);
+
+			frgba = frgba.ScaleRGB(1.0f/255.0f);
+
+			return frgba;
+		}
+
+		// quantize to 5 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR5G5B5(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 5 bits
+			frgba = frgba.ClampRGB().ScaleRGB(31.0f).RoundRGB();
+			unsigned int uiR5 = (unsigned int)frgba.fR;
+			unsigned int uiG5 = (unsigned int)frgba.fG;
+			unsigned int uiB5 = (unsigned int)frgba.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float)((uiR5 << 3) + (uiR5 >> 2));
+			frgba.fG = (float)((uiG5 << 3) + (uiG5 >> 2));
+			frgba.fB = (float)((uiB5 << 3) + (uiB5 >> 2));
+
+			frgba = frgba.ScaleRGB(1.0f / 255.0f);
+
+			return frgba;
+		}
+
+		// quantize to 6/7/6 bits, expand to 8 bits
+		inline ColorFloatRGBA QuantizeR6G7B6(void) const
+		{
+			ColorFloatRGBA frgba = *this;
+
+			// quantize to 6/7/6 bits
+			ColorFloatRGBA frgba6 = frgba.ClampRGB().ScaleRGB(63.0f).RoundRGB();
+			ColorFloatRGBA frgba7 = frgba.ClampRGB().ScaleRGB(127.0f).RoundRGB();
+			unsigned int uiR6 = (unsigned int)frgba6.fR;
+			unsigned int uiG7 = (unsigned int)frgba7.fG;
+			unsigned int uiB6 = (unsigned int)frgba6.fB;
+
+			// expand to 8 bits
+			frgba.fR = (float)((uiR6 << 2) + (uiR6 >> 4));
+			frgba.fG = (float)((uiG7 << 1) + (uiG7 >> 6));
+			frgba.fB = (float)((uiB6 << 2) + (uiB6 >> 4));
+
+			frgba = frgba.ScaleRGB(1.0f / 255.0f);
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ClampRGB(void)
+		{
+			ColorFloatRGBA frgba = *this;
+			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
+			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
+			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
+			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
+			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
+			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
+
+			return frgba;
+		}
+
+		inline ColorFloatRGBA ClampRGBA(void)
+		{
+			ColorFloatRGBA frgba = *this;
+			if (frgba.fR < 0.0f) { frgba.fR = 0.0f; }
+			if (frgba.fR > 1.0f) { frgba.fR = 1.0f; }
+			if (frgba.fG < 0.0f) { frgba.fG = 0.0f; }
+			if (frgba.fG > 1.0f) { frgba.fG = 1.0f; }
+			if (frgba.fB < 0.0f) { frgba.fB = 0.0f; }
+			if (frgba.fB > 1.0f) { frgba.fB = 1.0f; }
+			if (frgba.fA < 0.0f) { frgba.fA = 0.0f; }
+			if (frgba.fA > 1.0f) { frgba.fA = 1.0f; }
+
+			return frgba;
+		}
+
+		inline int IntRed(float a_fScale)
+		{
+			return (int)roundf(fR * a_fScale);
+		}
+
+		inline int IntGreen(float a_fScale)
+		{
+			return (int)roundf(fG * a_fScale);
+		}
+
+		inline int IntBlue(float a_fScale)
+		{
+			return (int)roundf(fB * a_fScale);
+		}
+
+		inline int IntAlpha(float a_fScale)
+		{
+			return (int)roundf(fA * a_fScale);
+		}
+
+		float	fR, fG, fB, fA;
+    };
+
+}
+
diff --git a/EtcLib/Etc/EtcConfig.h b/EtcLib/Etc/EtcConfig.h
new file mode 100644
index 0000000..6ce1b8c
--- /dev/null
+++ b/EtcLib/Etc/EtcConfig.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#ifdef _WIN32
+#define ETC_WINDOWS (1)
+#else
+#define ETC_WINDOWS (0)
+#endif
+
+#if __APPLE__
+#define ETC_OSX (1)
+#else
+#define ETC_OSX (0)
+#endif
+
+#if __unix__
+#define ETC_UNIX (1)
+#else
+#define ETC_UNIX (0)
+#endif
+
+
+// short names for common types
+#include <stdint.h>
+typedef int8_t i8;
+typedef int16_t i16;
+typedef int32_t i32;
+typedef int64_t i64;
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+typedef float	f32;
+typedef double	f64;
+
+// Keep asserts enabled in release builds during development
+#undef NDEBUG
+
+// 0=disable. stb_image can be used if you need to compress 
+//other image formats like jpg
+#define USE_STB_IMAGE_LOAD 0	
+
+#if ETC_WINDOWS
+#include <SDKDDKVer.h>
+#define _CRT_SECURE_NO_WARNINGS (1)
+#include <tchar.h>
+#endif
+
+#include <stdio.h>
+
diff --git a/EtcLib/Etc/EtcImage.cpp b/EtcLib/Etc/EtcImage.cpp
new file mode 100644
index 0000000..94a0e7b
--- /dev/null
+++ b/EtcLib/Etc/EtcImage.cpp
@@ -0,0 +1,685 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcImage.cpp
+
+Image is an array of 4x4 blocks that represent the encoding of the source image
+
+*/
+
+#include "EtcConfig.h"
+
+#include <stdlib.h>
+
+#include "EtcImage.h"
+
+#include "Etc.h"
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcSortedBlockList.h"
+
+#if ETC_WINDOWS
+#include <windows.h>
+#endif
+#include <ctime>
+#include <chrono>
+#include <future>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+// fix conflict with Block4x4::AlphaMix
+#ifdef OPAQUE
+#undef OPAQUE
+#endif
+#ifdef TRANSPARENT
+#undef TRANSPARENT
+#endif
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Image::Image(void)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_warningsToCapture = EncodingStatus::SUCCESS;
+		m_pafrgbaSource = nullptr;
+
+		m_pablock = nullptr;
+
+		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+		m_uiEncodingBitsBytes = 0;
+		m_paucEncodingBits = nullptr;
+
+		m_format = Format::UNKNOWN;
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// constructor using source image
+	// used to set state before Encode() is called
+	//
+	Image::Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
+					unsigned int a_uiSourceHeight, 
+					ErrorMetric a_errormetric)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_warningsToCapture = EncodingStatus::SUCCESS;
+		m_pafrgbaSource = (ColorFloatRGBA *) a_pafSourceRGBA;
+		m_uiSourceWidth = a_uiSourceWidth;
+		m_uiSourceHeight = a_uiSourceHeight;
+
+		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+		m_uiBlockColumns = m_uiExtendedWidth >> 2;
+		m_uiBlockRows = m_uiExtendedHeight >> 2;
+
+		m_pablock = new Block4x4[GetNumberOfBlocks()];
+		assert(m_pablock);
+
+		m_format = Format::UNKNOWN;
+
+		m_encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+		m_uiEncodingBitsBytes = 0;
+		m_paucEncodingBits = nullptr;
+
+		m_errormetric = a_errormetric;
+		m_fEffort = 0.0f;
+
+		m_iEncodeTime_ms = -1;
+
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+		m_bVerboseOutput = false;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// constructor using encoding bits
+	// recreates encoding state using a previously encoded image
+	//
+	Image::Image(Format a_format,
+					unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+					unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
+					Image *a_pimageSource, ErrorMetric a_errormetric)
+	{
+		m_encodingStatus = EncodingStatus::SUCCESS;
+		m_pafrgbaSource = nullptr;
+		m_uiSourceWidth = a_uiSourceWidth;
+		m_uiSourceHeight = a_uiSourceHeight;
+
+		m_uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+		m_uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+		m_uiBlockColumns = m_uiExtendedWidth >> 2;
+		m_uiBlockRows = m_uiExtendedHeight >> 2;
+
+		unsigned int uiBlocks = GetNumberOfBlocks();
+
+		m_pablock = new Block4x4[uiBlocks];
+		assert(m_pablock);
+
+		m_format = a_format;
+
+		m_iNumOpaquePixels = 0;
+		m_iNumTranslucentPixels = 0;
+		m_iNumTransparentPixels = 0;
+		
+		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
+		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
+			return;
+		}
+		m_uiEncodingBitsBytes = a_uiEncodingBitsBytes;
+		m_paucEncodingBits = a_paucEncidingBits;
+
+		m_errormetric = a_errormetric;
+		m_fEffort = 0.0f;
+		m_bVerboseOutput = false;
+		m_iEncodeTime_ms = -1;
+		
+		unsigned char *paucEncodingBits = m_paucEncodingBits;
+		unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+
+		unsigned int uiH = 0;
+		unsigned int uiV = 0;
+		for (unsigned int uiBlock = 0; uiBlock < uiBlocks; uiBlock++)
+		{
+			m_pablock[uiBlock].InitFromEtcEncodingBits(a_format, uiH, uiV, paucEncodingBits, 
+														a_pimageSource, a_errormetric);
+			paucEncodingBits += uiEncodingBitsBytesPerBlock;
+			uiH += 4;
+			if (uiH >= m_uiSourceWidth)
+			{
+				uiH = 0;
+				uiV += 4;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Image::~Image(void)
+	{
+		if (m_pablock != nullptr)
+		{
+			delete[] m_pablock;
+			m_pablock = nullptr;
+		}
+
+		/*if (m_paucEncodingBits != nullptr)
+		{
+			delete[] m_paucEncodingBits;
+			m_paucEncodingBits = nullptr;
+		}*/
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// encode an image
+	// create a set of encoding bits that conforms to a_format
+	// find best fit using a_errormetric
+	// explore a range of possible encodings based on a_fEffort (range = [0:100])
+	// speed up process using a_uiJobs as the number of process threads (a_uiJobs must not excede a_uiMaxJobs)
+	//
+	Image::EncodingStatus Image::Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, unsigned int a_uiJobs, unsigned int a_uiMaxJobs)
+	{
+
+		std::chrono::time_point<std::chrono::steady_clock> start = std::chrono::steady_clock::now();
+		
+		m_encodingStatus = EncodingStatus::SUCCESS;
+
+		m_format = a_format;
+		m_errormetric = a_errormetric;
+		m_fEffort = a_fEffort;
+
+		if (m_errormetric < 0 || m_errormetric > ERROR_METRICS)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_ERROR_METRIC);
+			return m_encodingStatus;
+		}
+
+		if (m_fEffort < 0.0f)
+		{
+			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
+			m_fEffort = 0.0f;
+		}
+		else if (m_fEffort > 100.0f)
+		{
+			AddToEncodingStatus(WARNING_EFFORT_OUT_OF_RANGE);
+			m_fEffort = 100.0f;
+		}
+		if (a_uiJobs < 1)
+		{
+			a_uiJobs = 1;
+			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
+		}
+		else if (a_uiJobs > a_uiMaxJobs)
+		{
+			a_uiJobs = a_uiMaxJobs;
+			AddToEncodingStatus(WARNING_JOBS_OUT_OF_RANGE);
+		}
+
+		m_encodingbitsformat = DetermineEncodingBitsFormat(m_format);
+
+		if (m_encodingbitsformat == Block4x4EncodingBits::Format::UNKNOWN)
+		{
+			AddToEncodingStatus(ERROR_UNKNOWN_FORMAT);
+			return m_encodingStatus;
+		}
+
+		assert(m_paucEncodingBits == nullptr);
+		m_uiEncodingBitsBytes = GetNumberOfBlocks() * Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+		m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes];
+
+		InitBlocksAndBlockSorter();
+
+
+		std::future<void> *handle = new std::future<void>[a_uiMaxJobs];
+
+		unsigned int uiNumThreadsNeeded = 0;
+		unsigned int uiUnfinishedBlocks = GetNumberOfBlocks();
+
+		uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
+			
+		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+		{
+			handle[i] = async(std::launch::async, &Image::RunFirstPass, this, i, uiNumThreadsNeeded);
+		}
+
+		RunFirstPass(uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
+
+		for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+		{
+			handle[i].get();
+		}
+
+		// perform effort-based encoding
+		if (m_fEffort > 0.0f)
+		{
+			unsigned int uiFinishedBlocks = 0;
+			unsigned int uiTotalEffortBlocks = static_cast<unsigned int>(roundf(0.01f * m_fEffort  * GetNumberOfBlocks()));
+
+			if (m_bVerboseOutput)
+			{
+				printf("effortblocks = %d\n", uiTotalEffortBlocks);
+			}
+			unsigned int uiPass = 0;
+			while (1)
+			{
+				if (m_bVerboseOutput)
+				{
+					uiPass++;
+					printf("pass %u\n", uiPass);
+				}
+				m_psortedblocklist->Sort();
+				uiUnfinishedBlocks = m_psortedblocklist->GetNumberOfSortedBlocks();
+				uiFinishedBlocks = GetNumberOfBlocks() - uiUnfinishedBlocks;
+				if (m_bVerboseOutput)
+				{
+					printf("    %u unfinished blocks\n", uiUnfinishedBlocks);
+					// m_psortedblocklist->Print();
+				}
+
+				
+
+				//stop enocding when we did enough to satify the effort percentage
+				if (uiFinishedBlocks >= uiTotalEffortBlocks)
+				{
+					if (m_bVerboseOutput)
+					{
+						printf("Finished %d Blocks out of %d\n", uiFinishedBlocks, uiTotalEffortBlocks);
+					}
+					break;
+				}
+
+				unsigned int uiIteratedBlocks = 0;
+				unsigned int blocksToIterateThisPass = (uiTotalEffortBlocks - uiFinishedBlocks);
+				uiNumThreadsNeeded = (uiUnfinishedBlocks < a_uiJobs) ? uiUnfinishedBlocks : a_uiJobs;
+
+				if (uiNumThreadsNeeded <= 1)
+				{
+					//since we already how many blocks each thread will process
+					//cap the thread limit to do the proper amount of work, and not more
+					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, 0, 1);
+				}
+				else
+				{
+					//we have a lot of work to do, so lets multi thread it
+					std::future<unsigned int> *handleToBlockEncoders = new std::future<unsigned int>[uiNumThreadsNeeded-1];
+
+					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+					{
+						handleToBlockEncoders[i] = async(std::launch::async, &Image::IterateThroughWorstBlocks, this, blocksToIterateThisPass, i, uiNumThreadsNeeded);
+					}
+					uiIteratedBlocks = IterateThroughWorstBlocks(blocksToIterateThisPass, uiNumThreadsNeeded - 1, uiNumThreadsNeeded);
+
+					for (int i = 0; i < (int)uiNumThreadsNeeded - 1; i++)
+					{
+						uiIteratedBlocks += handleToBlockEncoders[i].get();
+					}
+
+					delete[] handleToBlockEncoders;
+				}
+
+				if (m_bVerboseOutput)
+				{
+					printf("    %u iterated blocks\n", uiIteratedBlocks);
+				}
+			}
+		}
+
+		// generate Etc2-compatible bit-format 4x4 blocks
+		for (int i = 0; i < (int)a_uiJobs - 1; i++)
+		{
+			handle[i] = async(std::launch::async, &Image::SetEncodingBits, this, i, a_uiJobs);
+		}
+		SetEncodingBits(a_uiJobs - 1, a_uiJobs);
+
+		for (int i = 0; i < (int)a_uiJobs - 1; i++)
+		{
+			handle[i].get();
+		}
+
+		std::chrono::time_point<std::chrono::steady_clock> end = std::chrono::steady_clock::now();
+		std::chrono::milliseconds elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+		m_iEncodeTime_ms = (int)elapsed.count();
+
+		delete[] handle;
+		delete m_psortedblocklist;
+		return m_encodingStatus;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// iterate the encoding thru the blocks with the worst error
+	// stop when a_uiMaxBlocks blocks have been iterated
+	// split the blocks between the process threads using a_uiMultithreadingOffset and a_uiMultithreadingStride
+	//
+	unsigned int Image::IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks, 
+													unsigned int a_uiMultithreadingOffset, 
+													unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+		unsigned int uiIteratedBlocks = a_uiMultithreadingOffset;
+
+		SortedBlockList::Link *plink = m_psortedblocklist->GetLinkToFirstBlock();
+		for (plink = plink->Advance(a_uiMultithreadingOffset);
+				plink != nullptr;
+				plink = plink->Advance(a_uiMultithreadingStride) )
+		{
+			if (uiIteratedBlocks >= a_uiMaxBlocks)
+			{
+				break;
+			}
+
+			plink->GetBlock()->PerformEncodingIteration(m_fEffort);
+
+			uiIteratedBlocks += a_uiMultithreadingStride;	
+		}
+
+		return uiIteratedBlocks;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// determine which warnings to check for during Encode() based on encoding format
+	//
+	void Image::FindEncodingWarningTypesForCurFormat()
+	{
+		TrackEncodingWarning(WARNING_ALL_TRANSPARENT_PIXELS);
+		TrackEncodingWarning(WARNING_SOME_RGBA_NOT_0_TO_1);
+		switch (m_format)
+		{
+		case Image::Format::ETC1:
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
+			break;
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			TrackEncodingWarning(WARNING_ALL_OPAQUE_PIXELS);
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
+			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+			break;
+
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			TrackEncodingWarning(WARNING_SOME_NON_OPAQUE_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_TRANSLUCENT_PIXELS);
+			TrackEncodingWarning(WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+			break;
+		case Image::Format::FORMATS:
+		case Image::Format::UNKNOWN:
+		default:
+			assert(0);
+			break;
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// examine source pixels to check for warnings
+	//
+	void Image::FindAndSetEncodingWarnings()
+	{
+		int numPixels = (m_uiBlockRows * 4) * (m_uiBlockColumns * 4);
+		if (m_iNumOpaquePixels == numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_OPAQUE_PIXELS);
+		}
+		if (m_iNumOpaquePixels < numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_NON_OPAQUE_PIXELS);
+		}
+		if (m_iNumTranslucentPixels > 0)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_TRANSLUCENT_PIXELS);
+		}
+		if (m_iNumTransparentPixels == numPixels)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_ALL_TRANSPARENT_PIXELS);
+		}
+		if (m_numColorValues.fB > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO);
+		}
+		if (m_numColorValues.fG > 0.0f) 
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO);
+		}
+
+		if (m_numOutOfRangeValues.fR > 0.0f || m_numOutOfRangeValues.fG > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
+		}
+		if (m_numOutOfRangeValues.fB > 0.0f || m_numOutOfRangeValues.fA > 0.0f)
+		{
+			AddToEncodingStatusIfSignfigant(Image::EncodingStatus::WARNING_SOME_RGBA_NOT_0_TO_1);
+		}
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for a given image format
+	//
+	const char * Image::EncodingFormatToString(Image::Format a_format)
+	{
+		switch (a_format)
+		{
+		case Image::Format::ETC1:
+			return "ETC1";
+		case Image::Format::RGB8:
+			return "RGB8";
+		case Image::Format::SRGB8:
+			return "SRGB8";
+
+		case Image::Format::RGB8A1:
+			return "RGB8A1";
+		case Image::Format::SRGB8A1:
+			return "SRGB8A1";
+		case Image::Format::RGBA8:
+			return "RGBA8";
+		case Image::Format::SRGBA8:
+			return "SRGBA8";
+
+		case Image::Format::R11:
+			return "R11";
+		case Image::Format::SIGNED_R11:
+			return "SIGNED_R11";
+
+		case Image::Format::RG11:
+			return "RG11";
+		case Image::Format::SIGNED_RG11:
+			return "SIGNED_RG11";
+		case Image::Format::FORMATS:
+		case Image::Format::UNKNOWN:
+		default:
+			return "UNKNOWN";
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a string name for the image's format
+	//
+	const char * Image::EncodingFormatToString(void)
+	{
+		return EncodingFormatToString(m_format);
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// init image blocks prior to encoding
+	// init block sorter for subsequent sortings
+	// check for encoding warnings
+	//
+	void Image::InitBlocksAndBlockSorter(void)
+	{
+		
+		FindEncodingWarningTypesForCurFormat();
+
+		// init each block
+		Block4x4 *pblock = m_pablock;
+		unsigned char *paucEncodingBits = m_paucEncodingBits;
+		for (unsigned int uiBlockRow = 0; uiBlockRow < m_uiBlockRows; uiBlockRow++)
+		{
+			unsigned int uiBlockV = uiBlockRow * 4;
+
+			for (unsigned int uiBlockColumn = 0; uiBlockColumn < m_uiBlockColumns; uiBlockColumn++)
+			{
+				unsigned int uiBlockH = uiBlockColumn * 4;
+
+				pblock->InitFromSource(this, uiBlockH, uiBlockV, paucEncodingBits, m_errormetric);
+
+				paucEncodingBits += Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat);
+
+				pblock++;
+			}
+		}
+
+		FindAndSetEncodingWarnings();
+
+		// init block sorter
+		{
+			m_psortedblocklist = new SortedBlockList(GetNumberOfBlocks(), 100);
+
+			for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
+			{
+				pblock = &m_pablock[uiBlock];
+				m_psortedblocklist->AddBlock(pblock);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// run the first pass of the encoder
+	// the encoder generally finds a reasonable, fast encoding
+	// this is run on all blocks regardless of effort to ensure that all blocks have a valid encoding
+	//
+	void Image::RunFirstPass(unsigned int a_uiMultithreadingOffset, unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+
+		for (unsigned int uiBlock = a_uiMultithreadingOffset;
+				uiBlock < GetNumberOfBlocks(); 
+				uiBlock += a_uiMultithreadingStride)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			pblock->PerformEncodingIteration(m_fEffort);
+		}
+	}
+
+    // ----------------------------------------------------------------------------------------------------
+	// set the encoding bits (for the output file) based on the best encoding for each block
+	//
+	void Image::SetEncodingBits(unsigned int a_uiMultithreadingOffset,
+								unsigned int a_uiMultithreadingStride)
+	{
+		assert(a_uiMultithreadingStride > 0);
+
+		for (unsigned int uiBlock = a_uiMultithreadingOffset; 
+				uiBlock < GetNumberOfBlocks(); 
+				uiBlock += a_uiMultithreadingStride)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			pblock->SetEncodingBitsFromEncoding();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return the image error
+	// image error is the sum of all block errors
+	//
+	float Image::GetError(void)
+	{
+		float fError = 0.0f;
+
+		for (unsigned int uiBlock = 0; uiBlock < GetNumberOfBlocks(); uiBlock++)
+		{
+			Block4x4 *pblock = &m_pablock[uiBlock];
+			fError += pblock->GetError();
+		}
+
+		return fError;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// determine the encoding bits format based on the encoding format
+	// the encoding bits format is a family of bit encodings that are shared across various encoding formats
+	//
+	Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format)
+	{
+		Block4x4EncodingBits::Format encodingbitsformat;
+
+		// determine encoding bits format from image format
+		switch (a_format)
+		{
+		case Format::ETC1:
+		case Format::RGB8:
+		case Format::SRGB8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8;
+			break;
+
+		case Format::RGBA8:
+		case Format::SRGBA8:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGBA8;
+			break;
+
+		case Format::R11:
+		case Format::SIGNED_R11:
+			encodingbitsformat = Block4x4EncodingBits::Format::R11;
+			break;
+
+		case Format::RG11:
+		case Format::SIGNED_RG11:
+			encodingbitsformat = Block4x4EncodingBits::Format::RG11;
+			break;
+
+		case Format::RGB8A1:
+		case Format::SRGB8A1:
+			encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1;
+			break;
+
+		default:
+			encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN;
+			break;
+		}
+
+		return encodingbitsformat;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}	// namespace Etc
diff --git a/EtcLib/Etc/EtcImage.h b/EtcLib/Etc/EtcImage.h
new file mode 100644
index 0000000..fb67131
--- /dev/null
+++ b/EtcLib/Etc/EtcImage.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+//#include "Etc.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcErrorMetric.h"
+
+
+namespace Etc
+{
+	class Block4x4;
+	class EncoderSpec;
+	class SortedBlockList;
+
+    class Image
+    {
+    public:
+
+		//the differnt warning and errors that can come up during encoding
+		enum  EncodingStatus
+		{
+			SUCCESS = 0,
+			//
+			WARNING_THRESHOLD = 1 << 0,
+			//
+			WARNING_EFFORT_OUT_OF_RANGE = 1 << 1,
+			WARNING_JOBS_OUT_OF_RANGE = 1 << 2,
+			WARNING_SOME_NON_OPAQUE_PIXELS = 1 << 3,//just for opaque formats, etc1, rgb8, r11, rg11
+			WARNING_ALL_OPAQUE_PIXELS = 1 << 4,
+			WARNING_ALL_TRANSPARENT_PIXELS = 1 << 5,
+			WARNING_SOME_TRANSLUCENT_PIXELS = 1 << 6,//just for rgb8A1
+			WARNING_SOME_RGBA_NOT_0_TO_1 = 1 << 7,
+			WARNING_SOME_BLUE_VALUES_ARE_NOT_ZERO = 1 << 8,
+			WARNING_SOME_GREEN_VALUES_ARE_NOT_ZERO = 1 << 9,
+			//
+			ERROR_THRESHOLD = 1 << 16,
+			//
+			ERROR_UNKNOWN_FORMAT = 1 << 17,
+			ERROR_UNKNOWN_ERROR_METRIC = 1 << 18,
+			ERROR_ZERO_WIDTH_OR_HEIGHT = 1 << 19,
+			//
+		};
+		
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			ETC1,
+			//
+			// ETC2 formats
+			RGB8,
+			SRGB8,
+			RGBA8,
+			SRGBA8,
+			R11,
+			SIGNED_R11,
+			RG11,
+			SIGNED_RG11,
+			RGB8A1,
+			SRGB8A1,
+			//
+			FORMATS,
+			//
+			DEFAULT = SRGB8
+		};
+
+		// constructor using source image
+		Image(float *a_pafSourceRGBA, unsigned int a_uiSourceWidth,
+				unsigned int a_uiSourceHeight,
+				ErrorMetric a_errormetric);
+
+		// constructor using encoding bits
+		Image(Format a_format, 
+				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+				unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes,
+				Image *a_pimageSource,
+				ErrorMetric a_errormetric);
+
+		~Image(void);
+
+		EncodingStatus Encode(Format a_format, ErrorMetric a_errormetric, float a_fEffort, 
+			unsigned int a_uiJobs, unsigned int a_uiMaxJobs);
+
+		inline void AddToEncodingStatus(EncodingStatus a_encStatus)
+		{
+			m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus);
+		}
+		
+		inline unsigned int GetSourceWidth(void)
+		{
+			return m_uiSourceWidth;
+		}
+
+		inline unsigned int GetSourceHeight(void)
+		{
+			return m_uiSourceHeight;
+		}
+
+		inline unsigned int GetExtendedWidth(void)
+		{
+			return m_uiExtendedWidth;
+		}
+
+		inline unsigned int GetExtendedHeight(void)
+		{
+			return m_uiExtendedHeight;
+		}
+
+		inline unsigned int GetNumberOfBlocks()
+		{
+			return m_uiBlockColumns * m_uiBlockRows;
+		}
+
+		inline Block4x4 * GetBlocks()
+		{
+			return m_pablock;
+		}
+
+		inline unsigned char * GetEncodingBits(void)
+		{
+			return m_paucEncodingBits;
+		}
+
+		inline unsigned int GetEncodingBitsBytes(void)
+		{
+			return m_uiEncodingBitsBytes;
+		}
+
+		inline int GetEncodingTimeMs(void)
+		{
+			return m_iEncodeTime_ms;
+		}
+
+		float GetError(void);
+
+		inline ColorFloatRGBA * GetSourcePixel(unsigned int a_uiH, unsigned int a_uiV)
+		{
+			if (a_uiH >= m_uiSourceWidth || a_uiV >= m_uiSourceHeight)
+			{
+				return nullptr;
+			}
+
+			return &m_pafrgbaSource[a_uiV*m_uiSourceWidth + a_uiH];
+		}
+
+		inline Format GetFormat(void)
+		{
+			return m_format;
+		}
+
+		static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format);
+
+		inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension)
+		{
+			return (unsigned short)((a_ushOriginalDimension + 3) & ~3);
+		}
+
+		inline ErrorMetric GetErrorMetric(void)
+		{
+			return m_errormetric;
+		}
+
+		static const char * EncodingFormatToString(Image::Format a_format);
+		const char * EncodingFormatToString(void);
+		//used to get basic information about the image data
+		int m_iNumOpaquePixels;
+		int m_iNumTranslucentPixels;
+		int m_iNumTransparentPixels;
+
+		ColorFloatRGBA m_numColorValues;
+		ColorFloatRGBA m_numOutOfRangeValues;
+
+		bool m_bVerboseOutput;
+	private:
+		//add a warning or error to check for while encoding
+		inline void TrackEncodingWarning(EncodingStatus a_encStatus)
+		{
+			m_warningsToCapture = (EncodingStatus)((unsigned int)m_warningsToCapture | (unsigned int)a_encStatus);
+		}
+
+		//report the warning if it is something we care about for this encoding
+		inline void AddToEncodingStatusIfSignfigant(EncodingStatus a_encStatus)
+		{
+			if ((EncodingStatus)((unsigned int)m_warningsToCapture & (unsigned int)a_encStatus) == a_encStatus)
+			{
+				AddToEncodingStatus(a_encStatus);
+			}
+		}
+
+		Image(void);
+		void FindEncodingWarningTypesForCurFormat();
+		void FindAndSetEncodingWarnings();
+
+		void InitBlocksAndBlockSorter(void);
+
+		void RunFirstPass(unsigned int a_uiMultithreadingOffset, 
+							unsigned int a_uiMultithreadingStride);
+
+		void SetEncodingBits(unsigned int a_uiMultithreadingOffset,
+								unsigned int a_uiMultithreadingStride);
+
+		unsigned int IterateThroughWorstBlocks(unsigned int a_uiMaxBlocks,
+												unsigned int a_uiMultithreadingOffset,
+												unsigned int a_uiMultithreadingStride);
+
+		// inputs
+		ColorFloatRGBA *m_pafrgbaSource;
+		unsigned int m_uiSourceWidth;
+		unsigned int m_uiSourceHeight;
+		unsigned int m_uiExtendedWidth;
+		unsigned int m_uiExtendedHeight;
+		unsigned int m_uiBlockColumns;
+		unsigned int m_uiBlockRows;
+		// intermediate data
+		Block4x4 *m_pablock;
+		// encoding
+		Format m_format;
+		Block4x4EncodingBits::Format m_encodingbitsformat;
+		unsigned int m_uiEncodingBitsBytes;		// for entire image
+		unsigned char *m_paucEncodingBits;
+		ErrorMetric m_errormetric;
+		float m_fEffort;
+		// stats
+		int m_iEncodeTime_ms;
+		
+		SortedBlockList *m_psortedblocklist;
+		//this will hold any warning or errors that happen during encoding
+		EncodingStatus m_encodingStatus;
+		//these will be the warnings we are tracking
+		EncodingStatus m_warningsToCapture;
+	};
+
+} // namespace Etc
diff --git a/EtcLib/Etc/EtcMath.cpp b/EtcLib/Etc/EtcMath.cpp
new file mode 100644
index 0000000..cd70a9a
--- /dev/null
+++ b/EtcLib/Etc/EtcMath.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcConfig.h"
+#include "EtcMath.h"
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[]
+	// use a_fSlope and a_fOffset to define that line
+	//
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset)
+	{
+		float fPoints = (float)a_Points;
+
+		float fSumX = 0.0f;
+		float fSumY = 0.0f;
+		float fSumXY = 0.0f;
+		float fSumX2 = 0.0f;
+
+		for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++)
+		{
+			fSumX += a_afX[uiPoint];
+			fSumY += a_afY[uiPoint];
+			fSumXY += a_afX[uiPoint] * a_afY[uiPoint];
+			fSumX2 += a_afX[uiPoint] * a_afX[uiPoint];
+		}
+
+		float fDivisor = fPoints*fSumX2 - fSumX*fSumX;
+
+		// if vertical line
+		if (fDivisor == 0.0f)
+		{
+			*a_fSlope = 0.0f;
+			*a_fOffset = 0.0f;
+			return true;
+		}
+
+		*a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor;
+		*a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints;
+
+		return false;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/Etc/EtcMath.h b/EtcLib/Etc/EtcMath.h
new file mode 100644
index 0000000..3d951fe
--- /dev/null
+++ b/EtcLib/Etc/EtcMath.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// return true if vertical line
+	bool Regression(float a_afX[], float a_afY[], unsigned int a_Points,
+					float *a_fSlope, float *a_fOffset);
+
+	inline float ConvertMSEToPSNR(float a_fMSE)
+	{
+		if (a_fMSE == 0.0f)
+		{
+			return INFINITY;
+		}
+
+		return 10.0f * log10f(1.0f / a_fMSE);
+	}
+
+
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4.cpp b/EtcLib/EtcCodec/EtcBlock4x4.cpp
new file mode 100644
index 0000000..64a420e
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4.cpp
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+EtcBlock4x4.cpp
+
+Implements the state associated with each 4x4 block of pixels in an image
+
+Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an 
+alpha of NAN
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcColor.h"
+#include "EtcImage.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+#include "EtcBlock4x4Encoding_R11.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	// ETC pixels are scanned vertically.  
+	// this mapping is for when someone wants to scan the ETC pixels horizontally
+	const unsigned int Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4::Block4x4(void)
+	{
+		m_pimageSource = nullptr;
+		m_uiSourceH = 0;
+		m_uiSourceV = 0;
+
+		m_sourcealphamix = SourceAlphaMix::UNKNOWN;
+		m_boolBorderPixels = false;
+		m_boolPunchThroughPixels = false;
+
+		m_pencoding = nullptr;
+
+		m_errormetric = ErrorMetric::NUMERIC;
+
+	}
+	Block4x4::~Block4x4()
+	{
+		m_pimageSource = nullptr;
+		if (m_pencoding)
+		{
+			delete m_pencoding;
+			m_pencoding = nullptr;
+		}
+	}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding from a source image
+	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
+	// a_paucEncodingBits is the place to store the final encoding
+	// a_errormetric is used for finding the best encoding
+	//
+	void Block4x4::InitFromSource(Image *a_pimageSource, 
+									unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric)
+	{
+
+		Block4x4();
+
+		m_pimageSource = a_pimageSource;
+		m_uiSourceH = a_uiSourceH;
+		m_uiSourceV = a_uiSourceV;
+		m_errormetric = a_errormetric;
+
+		SetSourcePixels();
+
+		// set block encoder function
+		switch (m_pimageSource->GetFormat())
+		{
+		case Image::Format::ETC1:
+			m_pencoding = new Block4x4Encoding_ETC1;
+			break;
+
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			m_pencoding = new Block4x4Encoding_RGB8;
+			break;
+
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			switch (m_sourcealphamix)
+			{
+			case SourceAlphaMix::OPAQUE:
+				m_pencoding = new Block4x4Encoding_RGBA8_Opaque;
+				break;
+
+			case SourceAlphaMix::TRANSPARENT:
+				m_pencoding = new Block4x4Encoding_RGBA8_Transparent;
+				break;
+
+			case SourceAlphaMix::TRANSLUCENT:
+				m_pencoding = new Block4x4Encoding_RGBA8;
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			switch (m_sourcealphamix)
+			{
+			case SourceAlphaMix::OPAQUE:
+				m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
+				break;
+
+			case SourceAlphaMix::TRANSPARENT:
+				m_pencoding = new Block4x4Encoding_RGB8A1_Transparent;
+				break;
+
+			case SourceAlphaMix::TRANSLUCENT:
+				if (m_boolPunchThroughPixels)
+				{
+					m_pencoding = new Block4x4Encoding_RGB8A1;
+				}
+				else
+				{
+					m_pencoding = new Block4x4Encoding_RGB8A1_Opaque;
+				}
+				break;
+
+			default:
+				assert(0);
+				break;
+			}
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			m_pencoding = new Block4x4Encoding_R11;
+			break;
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			m_pencoding = new Block4x4Encoding_RG11;
+			break;
+		default:
+			assert(0);
+			break;
+		}
+
+		m_pencoding->InitFromSource(this, m_afrgbaSource,
+									a_paucEncodingBits, a_errormetric);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization of encoding state from a prior encoding using encoding bits
+	// [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource
+	// a_paucEncodingBits is the place to read the prior encoding
+	// a_imageformat is used to determine how to interpret a_paucEncodingBits
+	// a_errormetric was used for the prior encoding
+	//
+	void Block4x4::InitFromEtcEncodingBits(Image::Format a_imageformat,
+											unsigned int a_uiSourceH, unsigned int a_uiSourceV,
+											unsigned char *a_paucEncodingBits,
+											Image *a_pimageSource,
+											ErrorMetric a_errormetric)
+	{
+		Block4x4();
+
+		m_pimageSource = a_pimageSource;
+		m_uiSourceH = a_uiSourceH;
+		m_uiSourceV = a_uiSourceV;
+		m_errormetric = a_errormetric;
+
+		SetSourcePixels();
+
+		// set block encoder function
+		switch (a_imageformat)
+		{
+		case Image::Format::ETC1:
+			m_pencoding = new Block4x4Encoding_ETC1;
+			break;
+
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			m_pencoding = new Block4x4Encoding_RGB8;
+			break;
+
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			m_pencoding = new Block4x4Encoding_RGBA8;
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			m_pencoding = new Block4x4Encoding_RGB8A1;
+			break;
+
+		case Image::Format::R11:
+		case Image::Format::SIGNED_R11:
+			m_pencoding = new Block4x4Encoding_R11;
+			break;
+		case Image::Format::RG11:
+		case Image::Format::SIGNED_RG11:
+			m_pencoding = new Block4x4Encoding_RG11;
+			break;
+		default:
+			assert(0);
+			break;
+		}
+
+		m_pencoding->InitFromEncodingBits(this, a_paucEncodingBits, m_afrgbaSource,
+										m_pimageSource->GetErrorMetric());
+
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// set source pixels from m_pimageSource
+	// set m_alphamix
+	//
+	void Block4x4::SetSourcePixels(void)
+	{
+
+		Image::Format imageformat = m_pimageSource->GetFormat();
+
+		// alpha census
+		unsigned int uiTransparentSourcePixels = 0;
+		unsigned int uiOpaqueSourcePixels = 0;
+
+		// copy source to consecutive memory locations
+		// convert from image horizontal scan to block vertical scan
+		unsigned int uiPixel = 0;
+		for (unsigned int uiBlockPixelH = 0; uiBlockPixelH < Block4x4::COLUMNS; uiBlockPixelH++)
+		{
+			unsigned int uiSourcePixelH = m_uiSourceH + uiBlockPixelH;
+
+			for (unsigned int uiBlockPixelV = 0; uiBlockPixelV < Block4x4::ROWS; uiBlockPixelV++)
+			{
+				unsigned int uiSourcePixelV = m_uiSourceV + uiBlockPixelV;
+
+				ColorFloatRGBA *pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV);
+
+				// if pixel extends beyond source image because of block padding
+				if (pfrgbaSource == nullptr)
+				{
+					m_afrgbaSource[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, NAN);	// denotes border pixel
+					m_boolBorderPixels = true;
+					uiTransparentSourcePixels++;
+				}
+				else
+				{
+					//get teh current pixel data, and store some of the attributes
+					//before capping values to fit the encoder type
+					
+					m_afrgbaSource[uiPixel] = (*pfrgbaSource).ClampRGBA();
+
+					if (m_afrgbaSource[uiPixel].fA == 1.0f)
+					{
+						m_pimageSource->m_iNumOpaquePixels++;
+					}
+					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
+					{
+						m_pimageSource->m_iNumTransparentPixels++;
+					}
+					else if(m_afrgbaSource[uiPixel].fA > 0.0f && m_afrgbaSource[uiPixel].fA < 1.0f)
+					{
+						m_pimageSource->m_iNumTranslucentPixels++;
+					}
+					else
+					{
+						m_pimageSource->m_numOutOfRangeValues.fA++;
+					}
+
+					if (m_afrgbaSource[uiPixel].fR != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fR++;
+						//make sure we are getting a float between 0-1
+						if (m_afrgbaSource[uiPixel].fR - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fR++;
+						}
+					}
+
+					if (m_afrgbaSource[uiPixel].fG != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fG++;
+						if (m_afrgbaSource[uiPixel].fG - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fG++;
+						}
+					}
+					if (m_afrgbaSource[uiPixel].fB != 0.0f)
+					{
+						m_pimageSource->m_numColorValues.fB++;
+						if (m_afrgbaSource[uiPixel].fB - 1.0f > 0.0f)
+						{
+							m_pimageSource->m_numOutOfRangeValues.fB++;
+						}
+					}
+					// for formats with no alpha, set source alpha to 1
+					if (imageformat == Image::Format::ETC1 ||
+						imageformat == Image::Format::RGB8 ||
+						imageformat == Image::Format::SRGB8)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+					}
+
+					if (imageformat == Image::Format::R11 ||
+						imageformat == Image::Format::SIGNED_R11)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+						m_afrgbaSource[uiPixel].fG = 0.0f;
+						m_afrgbaSource[uiPixel].fB = 0.0f;
+					}
+
+					if (imageformat == Image::Format::RG11 ||
+						imageformat == Image::Format::SIGNED_RG11)
+					{
+						m_afrgbaSource[uiPixel].fA = 1.0f;
+						m_afrgbaSource[uiPixel].fB = 0.0f;
+					}
+
+				
+					// for RGB8A1, set source alpha to 0.0 or 1.0
+					// set punch through flag
+					if (imageformat == Image::Format::RGB8A1 ||
+						imageformat == Image::Format::SRGB8A1)
+					{
+						if (m_afrgbaSource[uiPixel].fA >= 0.5f)
+						{
+							m_afrgbaSource[uiPixel].fA = 1.0f;
+						}
+						else
+						{
+							m_afrgbaSource[uiPixel].fA = 0.0f;
+							m_boolPunchThroughPixels = true;
+						}
+					}
+
+					if (m_afrgbaSource[uiPixel].fA == 1.0f)
+					{
+						uiOpaqueSourcePixels++;
+					}
+					else if (m_afrgbaSource[uiPixel].fA == 0.0f)
+					{
+						uiTransparentSourcePixels++;
+					}
+
+				}
+
+				uiPixel += 1;
+			}
+		}
+
+		if (uiOpaqueSourcePixels == PIXELS)
+		{
+			m_sourcealphamix = SourceAlphaMix::OPAQUE;
+		}
+		else if (uiTransparentSourcePixels == PIXELS)
+		{
+			m_sourcealphamix = SourceAlphaMix::TRANSPARENT;
+		}
+		else
+		{
+			m_sourcealphamix = SourceAlphaMix::TRANSLUCENT;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// return a name for the encoding mode
+	//
+	const char * Block4x4::GetEncodingModeName(void)
+	{
+
+		switch (m_pencoding->GetMode())
+		{
+		case Block4x4Encoding::MODE_ETC1:
+			return "ETC1";
+		case Block4x4Encoding::MODE_T:
+			return "T";
+		case Block4x4Encoding::MODE_H:
+			return "H";
+		case Block4x4Encoding::MODE_PLANAR:
+			return "PLANAR";
+		default:
+			return "???";
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4.h b/EtcLib/EtcCodec/EtcBlock4x4.h
new file mode 100644
index 0000000..7716beb
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColor.h"
+#include "EtcColorFloatRGBA.h"
+#include "EtcErrorMetric.h"
+#include "EtcImage.h"
+#include "EtcBlock4x4Encoding.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits;
+
+	class Block4x4
+	{
+	public:
+
+		static const unsigned int ROWS = 4;
+		static const unsigned int COLUMNS = 4;
+		static const unsigned int PIXELS = ROWS * COLUMNS;
+
+		// the alpha mix for a 4x4 block of pixels
+		enum class SourceAlphaMix
+		{
+			UNKNOWN,
+			//
+			OPAQUE,			// all 1.0
+			TRANSPARENT,	// all 0.0 or NAN
+			TRANSLUCENT		// not all opaque or transparent
+		};
+
+		typedef void (Block4x4::*EncoderFunctionPtr)(void);
+
+		Block4x4(void);
+		~Block4x4();
+		void InitFromSource(Image *a_pimageSource,
+							unsigned int a_uiSourceH,
+							unsigned int a_uiSourceV,
+							unsigned char *a_paucEncodingBits,
+							ErrorMetric a_errormetric);
+
+		void InitFromEtcEncodingBits(Image::Format a_imageformat,
+										unsigned int a_uiSourceH,
+										unsigned int a_uiSourceV,
+										unsigned char *a_paucEncodingBits,
+										Image *a_pimageSource,
+										ErrorMetric a_errormetric);
+
+		// return true if final iteration was performed
+		inline void PerformEncodingIteration(float a_fEffort)
+		{
+			m_pencoding->PerformIteration(a_fEffort);
+		}
+
+		inline void SetEncodingBitsFromEncoding(void)
+		{
+			m_pencoding->SetEncodingBits();
+		}
+
+		inline unsigned int GetSourceH(void)
+		{
+			return m_uiSourceH;
+		}
+
+		inline unsigned int GetSourceV(void)
+		{
+			return m_uiSourceV;
+		}
+
+		inline float GetError(void)
+		{
+			return m_pencoding->GetError();
+		}
+
+		static const unsigned int s_auiPixelOrderHScan[PIXELS];
+
+		inline ColorFloatRGBA * GetDecodedColors(void)
+		{
+			return m_pencoding->GetDecodedColors();
+		}
+
+		inline float * GetDecodedAlphas(void)
+		{
+			return m_pencoding->GetDecodedAlphas();
+		}
+
+		inline Block4x4Encoding::Mode GetEncodingMode(void)
+		{
+			return m_pencoding->GetMode();
+		}
+
+		inline bool GetFlip(void)
+		{
+			return m_pencoding->GetFlip();
+		}
+
+		inline bool IsDifferential(void)
+		{
+			return m_pencoding->IsDifferential();
+		}
+
+		inline ColorFloatRGBA * GetSource()
+		{
+			return m_afrgbaSource;
+		}
+
+		inline ErrorMetric GetErrorMetric()
+		{
+			return m_errormetric;
+		}
+
+		const char * GetEncodingModeName(void);
+
+		inline Block4x4Encoding * GetEncoding(void)
+		{
+			return m_pencoding;
+		}
+
+		inline SourceAlphaMix GetSourceAlphaMix(void)
+		{
+			return m_sourcealphamix;
+		}
+
+		inline Image * GetImageSource(void)
+		{
+			return m_pimageSource;
+		}
+
+		inline bool HasBorderPixels(void)
+		{
+			return m_boolBorderPixels;
+		}
+
+		inline bool HasPunchThroughPixels(void)
+		{
+			return m_boolPunchThroughPixels;
+		}
+
+	private:
+
+		void SetSourcePixels(void);
+
+		Image				*m_pimageSource;
+		unsigned int		m_uiSourceH;
+		unsigned int		m_uiSourceV;
+		ErrorMetric			m_errormetric;
+		ColorFloatRGBA		m_afrgbaSource[PIXELS];		// vertical scan
+
+		SourceAlphaMix		m_sourcealphamix;
+		bool				m_boolBorderPixels;			// marked as rgba(NAN, NAN, NAN, NAN)
+		bool				m_boolPunchThroughPixels;	// RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5
+
+		Block4x4Encoding	*m_pencoding;
+
+	};
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp
new file mode 100644
index 0000000..0ccea2a
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding.cpp
+
+Block4x4Encoding is the abstract base class for the different encoders.  Each encoder targets a 
+particular file format (e.g. ETC1, RGB8, RGBA8, R11)
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding::Block4x4Encoding(void)
+	{
+
+		m_pblockParent = nullptr;
+
+		m_pafrgbaSource = nullptr;
+
+		m_boolBorderPixels = false;
+
+		m_fError = -1.0f;
+
+		m_mode = MODE_UNKNOWN;
+
+		m_uiEncodingIterations = 0;
+		m_boolDone = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
+			m_afDecodedAlphas[uiPixel] = -1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialize the generic encoding for a 4x4 block
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// init the decoded pixels to -1 to mark them as undefined
+	// init the error to -1 to mark it as undefined
+	//
+	void Block4x4Encoding::Init(Block4x4 *a_pblockParent,
+								ColorFloatRGBA *a_pafrgbaSource,
+								ErrorMetric a_errormetric)
+	{
+
+		m_pblockParent = a_pblockParent;
+
+		m_pafrgbaSource = a_pafrgbaSource;
+
+		m_boolBorderPixels = m_pblockParent->HasBorderPixels();
+
+		m_fError = -1.0f;
+
+		m_uiEncodingIterations = 0;
+
+		m_errormetric = a_errormetric;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(-1.0f, -1.0f, -1.0f, -1.0f);
+			m_afDecodedAlphas[uiPixel] = -1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the error for the block by summing the pixel errors
+	//
+	void Block4x4Encoding::CalcBlockError(void)
+	{
+		m_fError = 0.0f;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], m_afDecodedAlphas[uiPixel],
+										m_pafrgbaSource[uiPixel]);
+		}
+		
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate the error between the source pixel and the decoded pixel
+	// the error amount is base on the error metric
+	//
+	float Block4x4Encoding::CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
+											ColorFloatRGBA a_frgbaSourcePixel)
+	{
+
+		// if a border pixel
+		if (isnan(a_frgbaSourcePixel.fA))
+		{
+			return 0.0f;
+		}
+
+		if (m_errormetric == ErrorMetric::RGBA)
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fDRed = (a_fDecodedAlpha * a_frgbaDecodedColor.fR) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fR);
+			float fDGreen = (a_fDecodedAlpha * a_frgbaDecodedColor.fG) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fG);
+			float fDBlue = (a_fDecodedAlpha * a_frgbaDecodedColor.fB) -
+							(a_frgbaSourcePixel.fA * a_frgbaSourcePixel.fB);
+
+			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
+
+			return fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue + fDAlpha*fDAlpha;
+		}
+		else if (m_errormetric == ErrorMetric::REC709)
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fLuma1 = a_frgbaSourcePixel.fR*0.2126f + a_frgbaSourcePixel.fG*0.7152f + a_frgbaSourcePixel.fB*0.0722f;
+			float fChromaR1 = 0.5f * ((a_frgbaSourcePixel.fR - fLuma1) * (1.0f / (1.0f - 0.2126f)));
+			float fChromaB1 = 0.5f * ((a_frgbaSourcePixel.fB - fLuma1) * (1.0f / (1.0f - 0.0722f)));
+
+			float fLuma2 = a_frgbaDecodedColor.fR*0.2126f +
+							a_frgbaDecodedColor.fG*0.7152f +
+							a_frgbaDecodedColor.fB*0.0722f;
+			float fChromaR2 = 0.5f * ((a_frgbaDecodedColor.fR - fLuma2) * (1.0f / (1.0f - 0.2126f)));
+			float fChromaB2 = 0.5f * ((a_frgbaDecodedColor.fB - fLuma2) * (1.0f / (1.0f - 0.0722f)));
+
+			float fDeltaL = a_frgbaSourcePixel.fA * fLuma1 - a_fDecodedAlpha * fLuma2;
+			float fDeltaCr = a_frgbaSourcePixel.fA * fChromaR1 - a_fDecodedAlpha * fChromaR2;
+			float fDeltaCb = a_frgbaSourcePixel.fA * fChromaB1 - a_fDecodedAlpha * fChromaB2;
+
+			float fDAlpha = a_fDecodedAlpha - a_frgbaSourcePixel.fA;
+
+			// Favor Luma accuracy over Chroma, and Red over Blue 
+			return LUMA_WEIGHT*fDeltaL*fDeltaL +
+					fDeltaCr*fDeltaCr +
+					CHROMA_BLUE_WEIGHT*fDeltaCb*fDeltaCb +
+					fDAlpha*fDAlpha;
+	#if 0
+			float fDRed = a_frgbaDecodedPixel.fR - a_frgbaSourcePixel.fR;
+			float fDGreen = a_frgbaDecodedPixel.fG - a_frgbaSourcePixel.fG;
+			float fDBlue = a_frgbaDecodedPixel.fB - a_frgbaSourcePixel.fB;
+			return 2.0f * 3.0f * fDeltaL * fDeltaL + fDRed*fDRed + fDGreen*fDGreen + fDBlue*fDBlue;
+#endif
+		}
+		else if (m_errormetric == ErrorMetric::NORMALXYZ)
+		{
+			float fDecodedX = 2.0f * a_frgbaDecodedColor.fR - 1.0f;
+			float fDecodedY = 2.0f * a_frgbaDecodedColor.fG - 1.0f;
+			float fDecodedZ = 2.0f * a_frgbaDecodedColor.fB - 1.0f;
+
+			float fDecodedLength = sqrtf(fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ);
+
+			if (fDecodedLength < 0.5f)
+			{
+				return 1.0f;
+			}
+			else if (fDecodedLength == 0.0f)
+			{
+				fDecodedX = 1.0f;
+				fDecodedY = 0.0f;
+				fDecodedZ = 0.0f;
+			}
+			else
+			{
+				fDecodedX /= fDecodedLength;
+				fDecodedY /= fDecodedLength;
+				fDecodedZ /= fDecodedLength;
+			}
+
+			float fSourceX = 2.0f * a_frgbaSourcePixel.fR - 1.0f;
+			float fSourceY = 2.0f * a_frgbaSourcePixel.fG - 1.0f;
+			float fSourceZ = 2.0f * a_frgbaSourcePixel.fB - 1.0f;
+
+			float fSourceLength = sqrtf(fSourceX*fSourceX + fSourceY*fSourceY + fSourceZ*fSourceZ);
+
+			if (fSourceLength == 0.0f)
+			{
+				fSourceX = 1.0f;
+				fSourceY = 0.0f;
+				fSourceZ = 0.0f;
+			}
+			else
+			{
+				fSourceX /= fSourceLength;
+				fSourceY /= fSourceLength;
+				fSourceZ /= fSourceLength;
+			}
+
+			float fDotProduct = fSourceX*fDecodedX + fSourceY*fDecodedY + fSourceZ*fDecodedZ;
+			float fNormalizedDotProduct = 1.0f - 0.5f * (fDotProduct + 1.0f);
+			float fDotProductError = fNormalizedDotProduct * fNormalizedDotProduct;
+			
+			float fLength2 = fDecodedX*fDecodedX + fDecodedY*fDecodedY + fDecodedZ*fDecodedZ;
+			float fLength2Error = fabsf(1.0f - fLength2);
+
+			float fDeltaW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
+			float fErrorW = fDeltaW * fDeltaW;
+
+			return fDotProductError + fLength2Error + fErrorW;
+		}
+		else // ErrorMetric::NUMERIC
+		{
+			assert(a_fDecodedAlpha >= 0.0f);
+
+			float fDX = a_frgbaDecodedColor.fR - a_frgbaSourcePixel.fR;
+			float fDY = a_frgbaDecodedColor.fG - a_frgbaSourcePixel.fG;
+			float fDZ = a_frgbaDecodedColor.fB - a_frgbaSourcePixel.fB;
+			float fDW = a_frgbaDecodedColor.fA - a_frgbaSourcePixel.fA;
+
+			return fDX*fDX + fDY*fDY + fDZ*fDZ + fDW*fDW;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
+
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding.h
new file mode 100644
index 0000000..d1fad23
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+#include "EtcErrorMetric.h"
+
+#include <assert.h>
+#include <float.h>
+
+namespace Etc
+{
+	class Block4x4;
+
+	// abstract base class for specific encodings
+	class Block4x4Encoding
+	{
+	public:
+
+		static const unsigned int ROWS = 4;
+		static const unsigned int COLUMNS = 4;
+		static const unsigned int PIXELS = ROWS * COLUMNS;
+		static constexpr float LUMA_WEIGHT = 3.0f;
+		static constexpr float CHROMA_BLUE_WEIGHT = 0.5f;
+
+		typedef enum
+		{
+			MODE_UNKNOWN,
+			//
+			MODE_ETC1,
+			MODE_T,
+			MODE_H,
+			MODE_PLANAR,
+			MODE_R11,
+			MODE_RG11,
+			//
+			MODES
+		} Mode;
+
+		Block4x4Encoding(void);
+		//virtual ~Block4x4Encoding(void) =0;
+		virtual ~Block4x4Encoding(void) {}
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+
+									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) = 0;
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+
+											ErrorMetric a_errormetric) = 0;
+
+		// perform an iteration of the encoding
+		// the first iteration must generate a complete, valid (if poor) encoding
+		virtual void PerformIteration(float a_fEffort) = 0;
+
+		void CalcBlockError(void);
+
+		inline float GetError(void)
+		{
+			assert(m_fError >= 0.0f);
+
+			return m_fError;
+		}
+
+		inline ColorFloatRGBA * GetDecodedColors(void)
+		{
+			return m_afrgbaDecodedColors;
+		}
+
+		inline float * GetDecodedAlphas(void)
+		{
+			return m_afDecodedAlphas;
+		}
+
+		virtual void SetEncodingBits(void) = 0;
+
+		virtual bool GetFlip(void) = 0;
+
+		virtual bool IsDifferential(void) = 0;
+
+		virtual bool HasSeverelyBentDifferentialColors(void) const = 0;
+
+		inline Mode GetMode(void)
+		{
+			return m_mode;
+		}
+
+		inline bool IsDone(void)
+		{
+			return m_boolDone;
+		}
+
+		inline void SetDoneIfPerfect()
+		{
+			if (GetError() == 0.0f)
+			{
+				m_boolDone = true;
+			}
+		}
+
+		float CalcPixelError(ColorFloatRGBA a_frgbaDecodedColor, float a_fDecodedAlpha,
+								ColorFloatRGBA a_frgbaSourcePixel);
+
+	protected:
+
+		void Init(Block4x4 *a_pblockParent,
+					ColorFloatRGBA *a_pafrgbaSource,
+
+					ErrorMetric a_errormetric);
+
+		Block4x4		*m_pblockParent;
+		ColorFloatRGBA	*m_pafrgbaSource;
+
+		bool			m_boolBorderPixels;				// if block has any border pixels
+
+		ColorFloatRGBA	m_afrgbaDecodedColors[PIXELS];	// decoded RGB components, ignore Alpha
+		float			m_afDecodedAlphas[PIXELS];		// decoded alpha component
+		float			m_fError;						// error for RGBA relative to m_pafrgbaSource
+
+		// intermediate encoding
+		Mode			m_mode;
+
+		unsigned int	m_uiEncodingIterations;
+		bool			m_boolDone;						// all iterations have been done
+		ErrorMetric		m_errormetric;
+
+	private:
+
+	};
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h b/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h
new file mode 100644
index 0000000..5ba879e
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4EncodingBits.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4EncodingBits
+	// Base class for Block4x4EncodingBits_XXXX
+	// ################################################################################
+
+	class Block4x4EncodingBits
+	{
+	public:
+
+		enum class Format
+		{
+			UNKNOWN,
+			//
+			RGB8,
+			RGBA8,
+			R11,
+			RG11,
+			RGB8A1,
+			//
+			FORMATS
+		};
+
+		static unsigned int GetBytesPerBlock(Format a_format)
+		{
+			switch (a_format)
+			{
+			case Format::RGB8:
+			case Format::R11:
+			case Format::RGB8A1:
+				return 8;
+				break;
+
+			case Format::RGBA8:
+			case Format::RG11:
+				return 16;
+				break;
+
+			default:
+				return 0;
+				break;
+			}
+
+		}
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_RGB8
+	// Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_RGB8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+
+		inline Block4x4EncodingBits_RGB8(void)
+		{
+			assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK);
+
+			for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++)
+			{
+				auc[uiByte] = 0;
+			}
+
+		}
+
+		typedef struct
+		{
+			unsigned red2 : 4;
+			unsigned red1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned blue2 : 4;
+			unsigned blue1 : 4;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Individual;
+
+		typedef struct
+		{
+			signed dred2 : 3;
+			unsigned red1 : 5;
+			//
+			signed dgreen2 : 3;
+			unsigned green1 : 5;
+			//
+			signed dblue2 : 3;
+			unsigned blue1 : 5;
+			//
+			unsigned flip : 1;
+			unsigned diff : 1;
+			unsigned cw2 : 3;
+			unsigned cw1 : 3;
+			//
+			unsigned int selectors;
+		} Differential;
+
+		typedef struct
+		{
+			unsigned red1b : 2;
+			unsigned detect2 : 1;
+			unsigned red1a : 2;
+			unsigned detect1 : 3;
+			//
+			unsigned blue1 : 4;
+			unsigned green1 : 4;
+			//
+			unsigned green2 : 4;
+			unsigned red2 : 4;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 2;
+			unsigned blue2 : 4;
+			//
+			unsigned int selectors;
+		} T;
+
+		typedef struct
+		{
+			unsigned green1a : 3;
+			unsigned red1 : 4;
+			unsigned detect1 : 1;
+			//
+			unsigned blue1b : 2;
+			unsigned detect3 : 1;
+			unsigned blue1a : 1;
+			unsigned green1b : 1;
+			unsigned detect2 : 3;
+			//
+			unsigned green2a : 3;
+			unsigned red2 : 4;
+			unsigned blue1c : 1;
+			//
+			unsigned db : 1;
+			unsigned diff : 1;
+			unsigned da : 1;
+			unsigned blue2 : 4;
+			unsigned green2b : 1;
+			//
+			unsigned int selectors;
+		} H;
+
+		typedef struct
+		{
+			unsigned originGreen1 : 1;
+			unsigned originRed : 6;
+			unsigned detect1 : 1;
+			//
+			unsigned originBlue1 : 1;
+			unsigned originGreen2 : 6;
+			unsigned detect2 : 1;
+			//
+			unsigned originBlue3 : 2;
+			unsigned detect4 : 1;
+			unsigned originBlue2 : 2;
+			unsigned detect3 : 3;
+			//
+			unsigned horizRed2 : 1;
+			unsigned diff : 1;
+			unsigned horizRed1 : 5;
+			unsigned originBlue4 : 1;
+			//
+			unsigned horizBlue1: 1;
+			unsigned horizGreen : 7;
+			//
+			unsigned vertRed1 : 3;
+			unsigned horizBlue2 : 5;
+			//
+			unsigned vertGreen1 : 5;
+			unsigned vertRed2 : 3;
+			//
+			unsigned vertBlue : 6;
+			unsigned vertGreen2 : 2;
+		} Planar;
+
+		union
+		{
+			unsigned char auc[BYTES_PER_BLOCK];
+			unsigned long int ul;
+			Individual individual;
+			Differential differential;
+			T t;
+			H h;
+			Planar planar;
+		};
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_A8
+	// Encoding bits for the A portion of RGBA8
+	// ################################################################################
+
+	class Block4x4EncodingBits_A8
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	// ################################################################################
+	// Block4x4EncodingBits_R11
+	// Encoding bits for the R portion of R11
+	// ################################################################################
+
+	class Block4x4EncodingBits_R11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 8;
+		static const unsigned int SELECTOR_BYTES = 6;
+
+		typedef struct
+		{
+			unsigned base : 8;
+			unsigned table : 4;
+			unsigned multiplier : 4;
+			unsigned selectors0 : 8;
+			unsigned selectors1 : 8;
+			unsigned selectors2 : 8;
+			unsigned selectors3 : 8;
+			unsigned selectors4 : 8;
+			unsigned selectors5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+	class Block4x4EncodingBits_RG11
+	{
+	public:
+
+		static const unsigned int BYTES_PER_BLOCK = 16;
+		static const unsigned int SELECTOR_BYTES = 12;
+
+		typedef struct
+		{
+			//Red portion
+			unsigned baseR : 8;
+			unsigned tableIndexR : 4;
+			unsigned multiplierR : 4;
+			unsigned selectorsR0 : 8;
+			unsigned selectorsR1 : 8;
+			unsigned selectorsR2 : 8;
+			unsigned selectorsR3 : 8;
+			unsigned selectorsR4 : 8;
+			unsigned selectorsR5 : 8;
+			//Green portion
+			unsigned baseG : 8;
+			unsigned tableIndexG : 4;
+			unsigned multiplierG : 4;
+			unsigned selectorsG0 : 8;
+			unsigned selectorsG1 : 8;
+			unsigned selectorsG2 : 8;
+			unsigned selectorsG3 : 8;
+			unsigned selectorsG4 : 8;
+			unsigned selectorsG5 : 8;
+		} Data;
+
+		Data data;
+
+	};
+
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp
new file mode 100644
index 0000000..7aa2de3
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.cpp
@@ -0,0 +1,1280 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_ETC1.cpp
+
+Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1.  This encoder is also
+used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// pixel processing order if the flip bit = 0 (horizontal split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+	// pixel processing order if the flip bit = 1 (vertical split)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// pixel processing order for horizontal scan (ETC normally does a vertical scan)
+	const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+	// pixel indices for different block halves
+	const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 };
+	const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 };
+
+	// CW ranges that the ETC1 decoders use
+	// CW is basically a contrast for the different selector bits, since these values are offsets to the base color
+	// the first axis in the array is indexed by the CW in the encoding bits
+	// the second axis in the array is indexed by the selector bits
+	float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] =
+	{
+		{ 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f },
+		{ 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f },
+		{ 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f },
+		{ 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f },
+		{ 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f },
+		{ 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f },
+		{ 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f },
+		{ 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void)
+	{
+		m_mode = MODE_ETC1;
+		m_boolDiff = false;
+		m_boolFlip = false;
+		m_frgbaColor1 = ColorFloatRGBA();
+		m_frgbaColor2 = ColorFloatRGBA();
+		m_uiCW1 = 0;
+		m_uiCW2 = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_auiSelectors[uiPixel] = 0;
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+		m_boolMostLikelyFlip = false;
+
+		m_fError = -1.0f;
+
+		m_fError1 = -1.0f;
+		m_fError2 = -1.0f;
+		m_boolSeverelyBentDifferentialColors = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+	}
+
+	 Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_ETC1::InitFromSource(Block4x4 *a_pblockParent,
+												ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+		m_fError = -1.0f;
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_ETC1::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource, 
+														ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+		m_fError = -1.0f;
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = m_pencodingbitsRGB8->individual.diff;
+		m_boolFlip = m_pencodingbitsRGB8->individual.flip;
+		if (m_boolDiff)
+		{
+			int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2);
+			if (iR2 < 0)
+			{
+				iR2 = 0;
+			}
+			else if (iR2 > 31)
+			{
+				iR2 = 31;
+			}
+
+			int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2);
+			if (iG2 < 0)
+			{
+				iG2 = 0;
+			}
+			else if (iG2 > 31)
+			{
+				iG2 = 31;
+			}
+
+			int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2);
+			if (iB2 < 0)
+			{
+				iB2 = 0;
+			}
+			else if (iB2 > 31)
+			{
+				iB2 = 31;
+			}
+
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		}
+		else
+		{
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2);
+		}
+
+		m_uiCW1 = m_pencodingbitsRGB8->individual.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->individual.cw2;
+
+		InitFromEncodingBits_Selectors();
+
+		Decode();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// init the selectors from a prior encoding
+	//
+	void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void)
+	{
+
+		unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors;
+
+		for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++)
+		{
+			unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8));
+			unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8));
+			unsigned int uiShift = (unsigned int)(iPixel & 7);
+
+			unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1);
+			unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1);
+
+			m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryIndividual(m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryIndividual(!m_boolMostLikelyFlip, 1);
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates1();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates2();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates3();
+			if (a_fEffort <= 99.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_ETC1::PerformFirstIteration(void)
+	{
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+
+		TryIndividual(m_boolMostLikelyFlip, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryIndividual(!m_boolMostLikelyFlip, 0);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// algorithm:
+	// create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half
+	// note: the "gray line" is the line of equal delta RGB that goes thru the average color
+	// for each half:
+	//		see how close each of the 8 pixels are to the "gray line" that goes thru the source average color
+	//		create an error value that is the sum of the distances from the gray line
+	// h_error is the sum of Left and Right errors
+	// v_error is the sum of Top and Bottom errors
+	//
+	void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		CalculateSourceAverages();
+
+		float fLeftGrayErrorSum = 0.0f;
+		float fRightGrayErrorSum = 0.0f;
+		float fTopGrayErrorSum = 0.0f;
+		float fBottomGrayErrorSum = 0.0f;
+
+		for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+		{
+			ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel];
+			ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8];
+			ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]];
+			ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]];
+
+			float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft);
+			float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight);
+			float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop);
+			float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom);
+
+			fLeftGrayErrorSum += fLeftGrayError;
+			fRightGrayErrorSum += fRightGrayError;
+			fTopGrayErrorSum += fTopGrayError;
+			fBottomGrayErrorSum += fBottomGrayError;
+		}
+
+		if (DEBUG_PRINT)
+		{
+			printf("\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum);
+		}
+
+		m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate source pixel averages for each 2x2 quadrant in a 4x4 block
+	// these are used to determine the averages for each of the 4 different halves (left, right, top, bottom)
+	// ignore pixels that have alpha == NAN (these are border pixels outside of the source image)
+	// weight the averages based on a pixel's alpha
+	//
+	void Block4x4Encoding_ETC1::CalculateSourceAverages(void)
+	{
+		static const bool DEBUG_PRINT = false;
+
+
+		if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE)
+		{
+			ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5];
+			ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7];
+			ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13];
+			ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15];
+
+			m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f;
+			m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f;
+			m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f;
+			m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f;
+		}
+		else
+		{
+			float afSourceAlpha[PIXELS];
+
+			// treat alpha NAN as 0.0f
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? 
+																		0.0f : 
+																		m_pafrgbaSource[uiPixel].fA;
+			}
+
+			ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS];
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel];
+			}
+
+			ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] +
+										afrgbaAlphaWeightedSource[1] +
+										afrgbaAlphaWeightedSource[4] +
+										afrgbaAlphaWeightedSource[5];
+
+			ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] +
+										afrgbaAlphaWeightedSource[3] +
+										afrgbaAlphaWeightedSource[6] +
+										afrgbaAlphaWeightedSource[7];
+
+			ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] +
+										afrgbaAlphaWeightedSource[9] +
+										afrgbaAlphaWeightedSource[12] +
+										afrgbaAlphaWeightedSource[13];
+
+			ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] +
+										afrgbaAlphaWeightedSource[11] +
+										afrgbaAlphaWeightedSource[14] +
+										afrgbaAlphaWeightedSource[15];
+
+			float fWeightSumUL = afSourceAlpha[0] +
+									afSourceAlpha[1] +
+									afSourceAlpha[4] +
+									afSourceAlpha[5];
+
+			float fWeightSumLL = afSourceAlpha[2] +
+									afSourceAlpha[3] +
+									afSourceAlpha[6] +
+									afSourceAlpha[7];
+
+			float fWeightSumUR = afSourceAlpha[8] +
+									afSourceAlpha[9] +
+									afSourceAlpha[12] +
+									afSourceAlpha[13];
+
+			float fWeightSumLR = afSourceAlpha[10] +
+									afSourceAlpha[11] +
+									afSourceAlpha[14] +
+									afSourceAlpha[15];
+
+			ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL;
+			ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR;
+			ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR;
+			ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR;
+
+			float fWeightSumLeft = fWeightSumUL + fWeightSumLL;
+			float fWeightSumRight = fWeightSumUR + fWeightSumLR;
+			float fWeightSumTop = fWeightSumUL + fWeightSumUR;
+			float fWeightSumBottom = fWeightSumLL + fWeightSumLR;
+
+			// check to see if there is at least 1 pixel with  non-zero alpha
+			// completely transparent block should not make it to this code
+			assert((fWeightSumLeft + fWeightSumRight) > 0.0f);
+			assert((fWeightSumTop + fWeightSumBottom) > 0.0f);
+
+			if (fWeightSumLeft > 0.0f)
+			{
+				m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft);
+			}
+			if (fWeightSumRight > 0.0f)
+			{
+				m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight);
+			}
+			if (fWeightSumTop > 0.0f)
+			{
+				m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop);
+			}
+			if (fWeightSumBottom > 0.0f)
+			{
+				m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom);
+			}
+
+			if (fWeightSumLeft == 0.0f)
+			{
+				assert(fWeightSumRight > 0.0f);
+				m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight;
+			}
+			if (fWeightSumRight == 0.0f)
+			{
+				assert(fWeightSumLeft > 0.0f);
+				m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft;
+			}
+			if (fWeightSumTop == 0.0f)
+			{
+				assert(fWeightSumBottom > 0.0f);
+				m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom;
+			}
+			if (fWeightSumBottom == 0.0f)
+			{
+				assert(fWeightSumTop > 0.0f);
+				m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop;
+			}
+		}
+
+		
+
+		if (DEBUG_PRINT)
+		{
+			printf("\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n",
+				m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB,
+				m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB,
+				m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB,
+				m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+												int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; 
+				iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+				iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+					iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+					iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+						iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+						iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, 
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+																	*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{	
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 individual mode encoding
+	// use a_boolFlip to set the encoding F bit
+	// use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius]
+	// replace the encoding if the encoding error is less than previous encoding
+	//
+	void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius);
+
+		Block4x4Encoding_ETC1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryIndividualHalf(&trys.m_half1);
+		encodingTry.TryIndividualHalf(&trys.m_half2);
+
+		// use the best of each half
+		IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest;
+		IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest;
+		encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = false;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1];
+				float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2];
+
+				m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+				m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_fError = m_fError1 + m_fError2;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try an ETC1 differential mode encoding for a half of a 4x4 block
+	// vary the basecolor components using a radius
+	//
+	void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
+			iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 15);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+				iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 15);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+					iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 15);
+
+					IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+															FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+										*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_ETC1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best selector for each pixel based on a particular basecolor and CW that have been previously set
+	// calculate the selectors for each half of the block separately
+	// set the block error as the sum of each half's error
+	//
+	void Block4x4Encoding_ETC1::CalculateSelectors()
+	{
+		if (m_boolFlip)
+		{
+			CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping);
+		}
+		else
+		{
+			CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping);
+			CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping);
+		}
+
+		m_fError = m_fError1 + m_fError2;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// choose best selectors for half of the block
+	// calculate the error for half of the block
+	//
+	void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+		const unsigned int *pauiPixelMapping)
+	{
+		static const bool DEBUG_PRINT = false;
+
+		ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1;
+		unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1;
+
+		float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1;
+		*pfHalfError = FLT_MAX;
+
+		// try each CW
+		for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+		{
+			if (DEBUG_PRINT)
+			{
+				printf("\ncw=%u\n", uiCW);
+			}
+
+			unsigned int auiPixelSelectors[PIXELS / 2];
+			ColorFloatRGBA	afrgbaDecodedPixels[PIXELS / 2];
+			float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				if (DEBUG_PRINT)
+				{
+					printf("\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR,
+						m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB);
+				}
+
+				ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]];
+				ColorFloatRGBA frgbaDecodedPixel;
+
+				for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+				{
+					float fDeltaRGB = s_aafCwTable[uiCW][uiSelector];
+
+					frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB();
+
+					float fPixelError;
+					
+					fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[pauiPixelMapping[uiPixel]],
+														*pfrgbaSourcePixel);
+					
+					if (DEBUG_PRINT)
+					{
+						printf("\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f", uiPixel, uiSelector,
+							frgbaDecodedPixel.fR,
+							frgbaDecodedPixel.fG,
+							frgbaDecodedPixel.fB,
+							fPixelError);
+					}
+
+					if (fPixelError < afPixelErrors[uiPixel])
+					{
+						if (DEBUG_PRINT)
+						{
+							printf(" *");
+						}
+
+						auiPixelSelectors[uiPixel] = uiSelector;
+						afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel;
+						afPixelErrors[uiPixel] = fPixelError;
+					}
+
+					if (DEBUG_PRINT)
+					{
+						printf("\n");
+					}
+				}
+			}
+
+			// add up all pixel errors
+			float fCWError = 0.0f;
+			for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+			{
+				fCWError += afPixelErrors[uiPixel];
+			}
+			if (DEBUG_PRINT)
+			{
+				printf("\terror %.2f\n", fCWError);
+			}
+
+			// if best CW so far
+			if (fCWError < *pfHalfError)
+			{
+				*pfHalfError = fCWError;
+				*puiCW = uiCW;
+				for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+				{
+					m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel];
+					m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel];
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits(void)
+	{
+		assert(m_mode == MODE_ETC1);
+
+		if (m_boolDiff)
+		{
+			int iRed1 = m_frgbaColor1.IntRed(31.0f);
+			int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+			int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+			int iRed2 = m_frgbaColor2.IntRed(31.0f);
+			int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+			int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+			int iDRed2 = iRed2 - iRed1;
+			int iDGreen2 = iGreen2 - iGreen1;
+			int iDBlue2 = iBlue2 - iBlue1;
+
+			assert(iDRed2 >= -4 && iDRed2 < 4);
+			assert(iDGreen2 >= -4 && iDGreen2 < 4);
+			assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+			m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1;
+			m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1;
+			m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1;
+
+			m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+			m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+			m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+			m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+			m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+			m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+		}
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff;
+		m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the selectors in the encoding bits
+	//
+	void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void)
+	{
+
+		m_pencodingbitsRGB8->individual.selectors = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiSelector = m_auiSelectors[uiPixel];
+
+			// set index msb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8);
+
+			// set index lsb
+			m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_ETC1::Decode(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h
new file mode 100644
index 0000000..816aa05
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_ETC1.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcDifferentialTrys.h"
+#include "EtcIndividualTrys.h"
+
+namespace Etc
+{
+
+	// base class for Block4x4Encoding_RGB8
+	class Block4x4Encoding_ETC1 : public Block4x4Encoding
+	{
+	public:
+
+		Block4x4Encoding_ETC1(void);
+		virtual ~Block4x4Encoding_ETC1(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource, 
+
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		inline virtual bool GetFlip(void)
+		{
+			return m_boolFlip;
+		}
+
+		inline virtual bool IsDifferential(void)
+		{
+			return m_boolDiff;
+		}
+
+		virtual void SetEncodingBits(void);
+
+		void Decode(void);
+
+		inline ColorFloatRGBA GetColor1(void) const
+		{
+			return m_frgbaColor1;
+		}
+
+		inline ColorFloatRGBA GetColor2(void) const
+		{
+			return m_frgbaColor2;
+		}
+
+		inline const unsigned int * GetSelectors(void) const
+		{
+			return m_auiSelectors;
+		}
+
+		inline unsigned int GetCW1(void) const
+		{
+			return m_uiCW1;
+		}
+
+		inline unsigned int GetCW2(void) const
+		{
+			return m_uiCW2;
+		}
+
+		inline bool HasSeverelyBentDifferentialColors(void) const
+		{
+			return m_boolSeverelyBentDifferentialColors;
+		}
+
+	protected:
+
+		static const unsigned int s_auiPixelOrderFlip0[PIXELS];
+		static const unsigned int s_auiPixelOrderFlip1[PIXELS];
+		static const unsigned int s_auiPixelOrderHScan[PIXELS];
+
+		static const unsigned int s_auiLeftPixelMapping[8];
+		static const unsigned int s_auiRightPixelMapping[8];
+		static const unsigned int s_auiTopPixelMapping[8];
+		static const unsigned int s_auiBottomPixelMapping[8];
+
+		static const unsigned int SELECTOR_BITS = 2;
+		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+		static const unsigned int CW_BITS = 3;
+		static const unsigned int CW_RANGES = 1 << CW_BITS;
+
+		static float s_aafCwTable[CW_RANGES][SELECTORS];
+		static unsigned char s_aucDifferentialCwRange[256];
+
+		static const int MAX_DIFFERENTIAL = 3;
+		static const int MIN_DIFFERENTIAL = -4;
+
+		void InitFromEncodingBits_Selectors(void);
+
+		void PerformFirstIteration(void);
+		void CalculateMostLikelyFlip(void);
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius);
+		void TryIndividualHalf(IndividualTrys::Half *a_phalf);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+		void CalculateSelectors();
+		void CalculateHalfOfTheSelectors(unsigned int a_uiHalf,
+											const unsigned int *pauiPixelMapping);
+
+		// calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line
+		inline float CalcGrayDistance2(ColorFloatRGBA &r_frgbaPixel, 
+										ColorFloatRGBA &r_frgbaTarget)
+		{
+			float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) +
+								(r_frgbaPixel.fG - r_frgbaTarget.fG) +
+								(r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f;
+
+			ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB();
+
+			float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR;
+			float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG;
+			float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB;
+
+			return (fDR*fDR) + (fDG*fDG) + (fDB*fDB);
+		}
+
+		void SetEncodingBits_Selectors(void);
+
+		// intermediate encoding
+		bool			m_boolDiff;
+		bool			m_boolFlip;
+		ColorFloatRGBA	m_frgbaColor1;
+		ColorFloatRGBA	m_frgbaColor2;
+		unsigned int	m_uiCW1;
+		unsigned int	m_uiCW2;
+		unsigned int	m_auiSelectors[PIXELS];
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaSourceAverageLeft;
+		ColorFloatRGBA	m_frgbaSourceAverageRight;
+		ColorFloatRGBA	m_frgbaSourceAverageTop;
+		ColorFloatRGBA	m_frgbaSourceAverageBottom;
+		bool			m_boolMostLikelyFlip;
+
+		// stats
+		float			m_fError1;	// error for Etc1 half 1
+		float			m_fError2;	// error for Etc1 half 2
+		bool			m_boolSeverelyBentDifferentialColors;	// only valid if m_boolDiff;
+
+		// final encoding
+		Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8;		// or RGB8 portion of Block4x4EncodingBits_RGB8A8
+
+		private:
+
+		void CalculateSourceAverages(void);
+
+	};
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp
new file mode 100644
index 0000000..5dd9884
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.cpp
@@ -0,0 +1,429 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_R11.cpp
+
+Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11).  
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// modifier values to use for R11, SR11, RG11 and SRG11
+	float Block4x4Encoding_R11::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS]
+	{
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
+		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+
+		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
+		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_R11::Block4x4Encoding_R11(void)
+	{
+
+		m_pencodingbitsR11 = nullptr;
+
+	}
+
+	Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_R11::InitFromSource(Block4x4 *a_pblockParent,
+		ColorFloatRGBA *a_pafrgbaSource,
+		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_R11::InitFromEncodingBits(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+		m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)a_paucEncodingBits;
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+			(unsigned char *)m_pencodingbitsR11,
+			a_pafrgbaSource,
+			a_errormetric);
+
+		// init R11 portion
+		{
+			m_mode = MODE_R11;
+			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+			{
+				m_fRedBase = (float)(signed char)m_pencodingbitsR11->data.base;
+			}
+			else
+			{
+				m_fRedBase = (float)(unsigned char)m_pencodingbitsR11->data.base;
+			}
+			m_fRedMultiplier = (float)m_pencodingbitsR11->data.multiplier;
+			m_uiRedModifierTableIndex = m_pencodingbitsR11->data.table;
+
+			unsigned long long int ulliSelectorBits = 0;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors0 << 40;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors1 << 32;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors2 << 24;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors3 << 16;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors4 << 8;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsR11->data.selectors5;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiRedSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (SELECTORS - 1);
+			}
+
+			// decode the red channel
+			// calc red error
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				float fDecodedPixelData = 0.0f;
+				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+				{
+					fDecodedPixelData = DecodePixelRed(m_fRedBase, m_fRedMultiplier,
+						m_uiRedModifierTableIndex,
+						m_auiRedSelectors[uiPixel]);
+				}
+				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+				{
+					fDecodedPixelData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier,
+						m_uiRedModifierTableIndex,
+						m_auiRedSelectors[uiPixel]);
+				}
+				else
+				{
+					assert(0);
+				}
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fDecodedPixelData, 0.0f, 0.0f, 1.0f);
+			}
+			CalcBlockError();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_R11::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+		m_mode = MODE_R11;
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			m_fError = FLT_MAX;
+			m_fRedBlockError = FLT_MAX;		// artificially high value
+			CalculateR11(8, 0.0f, 0.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 1:
+			CalculateR11(8, 2.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			if (a_fEffort <= 24.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 2:
+			CalculateR11(8, 12.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			CalculateR11(7, 6.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 4:
+			CalculateR11(6, 3.0f, 1.0f);
+			m_fError = m_fRedBlockError;
+			break;
+
+		case 5:
+			CalculateR11(5, 1.0f, 0.0f);
+			m_fError = m_fRedBlockError;
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base color, multiplier and selectors
+	//
+	// a_uiSelectorsUsed limits the number of selector combinations to try
+	// a_fBaseRadius limits the range of base colors to try
+	// a_fMultiplierRadius limits the range of multipliers to try
+	//
+	void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, 
+												float a_fBaseRadius, float a_fMultiplierRadius)
+	{
+		// maps from virtual (monotonic) selector to ETC selector
+		static const unsigned int auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7};
+
+		// find min/max red
+		float fMinRed = 1.0f;
+		float fMaxRed = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// ignore border pixels
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			float fRed = m_pafrgbaSource[uiPixel].fR;
+
+			if (fRed < fMinRed)
+			{
+				fMinRed = fRed;
+			}
+			if (fRed > fMaxRed)
+			{
+				fMaxRed = fRed;
+			}
+		}
+		assert(fMinRed <= fMaxRed);
+
+		float fRedRange = (fMaxRed - fMinRed);
+
+		// try each modifier table entry							  
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			for (unsigned int uiMinVirtualSelector = 0; 
+					uiMinVirtualSelector <= (8- a_uiSelectorsUsed); 
+					uiMinVirtualSelector++)
+			{
+				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
+
+				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
+				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
+
+				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
+											s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+				float fCenter = fMinRed + fCenterRatio*fRedRange;
+				fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
+				if (fMinBase < 0.0f)
+				{
+					fMinBase = 0.0f;
+				}
+
+				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
+				if (fMaxBase > 1.0f)
+				{
+					fMaxBase = 1.0f;
+				}
+
+				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+				{
+					float fRangeMultiplier = roundf(fRedRange / fTableEntryRange);
+
+					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
+					if (fMinMultiplier < 1.0f)
+					{
+						fMinMultiplier = 0.0f;
+					}
+					else if (fMinMultiplier > 15.0f)
+					{
+						fMinMultiplier = 15.0f;
+					}
+
+					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
+					if (fMaxMultiplier < 1.0f)
+					{
+						fMaxMultiplier = 1.0f;
+					}
+					else if (fMaxMultiplier > 15.0f)
+					{
+						fMaxMultiplier = 15.0f;
+					}
+
+					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+					{
+						// find best selector for each pixel
+						unsigned int auiBestSelectors[PIXELS];
+						float afBestRedError[PIXELS];
+						float afBestPixelRed[PIXELS];
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							float fBestPixelRedError = FLT_MAX;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								float fPixelRed = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
+
+								ColorFloatRGBA frgba(fPixelRed, m_pafrgbaSource[uiPixel].fG,0.0f,1.0f);
+
+								float fPixelRedError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
+
+								if (fPixelRedError < fBestPixelRedError)
+								{
+									fBestPixelRedError = fPixelRedError;
+									auiBestSelectors[uiPixel] = uiSelector;
+									afBestRedError[uiPixel] = fBestPixelRedError;
+									afBestPixelRed[uiPixel] = fPixelRed;
+								}
+							}
+						}
+						float fBlockError = 0.0f;  
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							fBlockError += afBestRedError[uiPixel];
+						}
+						if (fBlockError < m_fRedBlockError)
+						{
+							m_fRedBlockError = fBlockError;
+
+							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+							{
+								m_fRedBase = 255.0f * fBase;
+							}
+							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+							{
+								m_fRedBase = (fBase * 255) - 128;
+							}
+							else
+							{
+								assert(0);
+							}
+							m_fRedMultiplier = fMultiplier;
+							m_uiRedModifierTableIndex = uiTableEntry;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiRedSelectors[uiPixel] = auiBestSelectors[uiPixel];
+								float fBestPixelRed = afBestPixelRed[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fBestPixelRed, 0.0f, 0.0f, 1.0f);
+								m_afDecodedAlphas[uiPixel] = 1.0f;
+							}
+						}
+					}
+				}
+
+			}
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_R11::SetEncodingBits(void)
+	{
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsR11->data.base = (unsigned char)roundf(m_fRedBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_R11 || m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsR11->data.base = (signed char)roundf(m_fRedBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsR11->data.table = m_uiRedModifierTableIndex;
+		m_pencodingbitsR11->data.multiplier = (unsigned char)roundf(m_fRedMultiplier);
+
+		unsigned long long int ulliSelectorBits = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiShift = 45 - (3 * uiPixel);
+			ulliSelectorBits |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
+		}
+
+		m_pencodingbitsR11->data.selectors0 = ulliSelectorBits >> 40;
+		m_pencodingbitsR11->data.selectors1 = ulliSelectorBits >> 32;
+		m_pencodingbitsR11->data.selectors2 = ulliSelectorBits >> 24;
+		m_pencodingbitsR11->data.selectors3 = ulliSelectorBits >> 16;
+		m_pencodingbitsR11->data.selectors4 = ulliSelectorBits >> 8;
+		m_pencodingbitsR11->data.selectors5 = ulliSelectorBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h
new file mode 100644
index 0000000..99cb4df
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_R11.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_R11;
+
+	// ################################################################################
+	// Block4x4Encoding_R11
+	// ################################################################################
+
+	class Block4x4Encoding_R11 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		Block4x4Encoding_R11(void);
+		virtual ~Block4x4Encoding_R11(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+			ColorFloatRGBA *a_pafrgbaSource,
+			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+			unsigned char *a_paucEncodingBits,
+			ColorFloatRGBA *a_pafrgbaSource,
+			ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		inline float GetRedBase(void) const
+		{
+			return m_fRedBase;
+		}
+
+		inline float GetRedMultiplier(void) const
+		{
+			return m_fRedMultiplier;
+		}
+
+		inline int GetRedTableIndex(void) const
+		{
+			return m_uiRedModifierTableIndex;
+		}
+
+		inline const unsigned int * GetRedSelectors(void) const
+		{
+			return m_auiRedSelectors;
+		}
+
+	protected:
+
+		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
+		static const unsigned int SELECTOR_BITS = 3;
+		static const unsigned int SELECTORS = 1 << SELECTOR_BITS;
+
+		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][SELECTORS];
+
+		void CalculateR11(unsigned int a_uiSelectorsUsed, 
+							float a_fBaseRadius, float a_fMultiplierRadius);
+
+		
+
+	
+		inline float DecodePixelRed(float a_fBase, float a_fMultiplier,
+			unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+		{
+			float fMultiplier = a_fMultiplier;
+			if (fMultiplier <= 0.0f)
+			{
+				fMultiplier = 1.0f / 8.0f;
+			}
+
+			float fPixelRed = a_fBase * 8 + 4 +
+				8 * fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector]*255;
+			fPixelRed /= 2047.0f;
+
+			if (fPixelRed < 0.0f)
+			{
+				fPixelRed = 0.0f;
+			}
+			else if (fPixelRed > 1.0f)
+			{
+				fPixelRed = 1.0f;
+			}
+
+			return fPixelRed;
+		}
+
+		Block4x4EncodingBits_R11 *m_pencodingbitsR11;
+
+		float m_fRedBase;
+		float m_fRedMultiplier;
+		float m_fRedBlockError;
+		unsigned int m_uiRedModifierTableIndex;
+		unsigned int m_auiRedSelectors[PIXELS];
+
+		
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp
new file mode 100644
index 0000000..a1bc9c9
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.cpp
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RG11.cpp
+
+Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11).
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RG11::Block4x4Encoding_RG11(void)
+	{
+		m_pencodingbitsRG11 = nullptr;
+	}
+
+	Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RG11::InitFromSource(Block4x4 *a_pblockParent,
+		ColorFloatRGBA *a_pafrgbaSource,
+		unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RG11::InitFromEncodingBits(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+
+		m_pencodingbitsRG11 = (Block4x4EncodingBits_RG11 *)a_paucEncodingBits;
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+			(unsigned char *)m_pencodingbitsRG11,
+			a_pafrgbaSource,
+			a_errormetric);
+		m_fError = 0.0f;
+
+		{
+			m_mode = MODE_RG11;
+			if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+			{
+				m_fRedBase = (float)(signed char)m_pencodingbitsRG11->data.baseR;
+				m_fGrnBase = (float)(signed char)m_pencodingbitsRG11->data.baseG;
+			}
+			else
+			{
+				m_fRedBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseR;
+				m_fGrnBase = (float)(unsigned char)m_pencodingbitsRG11->data.baseG;
+			}
+			m_fRedMultiplier = (float)m_pencodingbitsRG11->data.multiplierR;
+			m_fGrnMultiplier = (float)m_pencodingbitsRG11->data.multiplierG;
+			m_uiRedModifierTableIndex = m_pencodingbitsRG11->data.tableIndexR;
+			m_uiGrnModifierTableIndex = m_pencodingbitsRG11->data.tableIndexG;
+
+			unsigned long long int ulliSelectorBitsR = 0;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR0 << 40;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR1 << 32;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR2 << 24;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR3 << 16;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR4 << 8;
+			ulliSelectorBitsR |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsR5;
+
+			unsigned long long int ulliSelectorBitsG = 0;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG0 << 40;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG1 << 32;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG2 << 24;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG3 << 16;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG4 << 8;
+			ulliSelectorBitsG |= (unsigned long long int)m_pencodingbitsRG11->data.selectorsG5;
+
+			
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiRedSelectors[uiPixel] = (ulliSelectorBitsR >> uiShift) & (SELECTORS - 1);
+				m_auiGrnSelectors[uiPixel] = (ulliSelectorBitsG >> uiShift) & (SELECTORS - 1);
+			}
+
+			
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				float fRedDecodedData = 0.0f;
+				float fGrnDecodedData = 0.0f;
+				if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+				{
+					fRedDecodedData = DecodePixelRed(m_fRedBase, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
+					fGrnDecodedData = DecodePixelRed(m_fGrnBase, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
+				}
+				else if (a_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+				{
+					fRedDecodedData = DecodePixelRed(m_fRedBase + 128, m_fRedMultiplier, m_uiRedModifierTableIndex, m_auiRedSelectors[uiPixel]);
+					fGrnDecodedData = DecodePixelRed(m_fGrnBase + 128, m_fGrnMultiplier, m_uiGrnModifierTableIndex, m_auiGrnSelectors[uiPixel]);
+				}
+				else
+				{
+					assert(0);
+				}
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(fRedDecodedData, fGrnDecodedData, 0.0f, 1.0f);
+			}
+
+		}
+
+		CalcBlockError();
+ 	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RG11::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			m_fError = FLT_MAX;
+			m_fGrnBlockError = FLT_MAX;		// artificially high value
+			m_fRedBlockError = FLT_MAX;
+			CalculateR11(8, 0.0f, 0.0f);
+			CalculateG11(8, 0.0f, 0.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 1:
+			CalculateR11(8, 2.0f, 1.0f);
+			CalculateG11(8, 2.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			if (a_fEffort <= 24.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 2:
+			CalculateR11(8, 12.0f, 1.0f);
+			CalculateG11(8, 12.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			CalculateR11(7, 6.0f, 1.0f);
+			CalculateG11(7, 6.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 4:
+			CalculateR11(6, 3.0f, 1.0f);
+			CalculateG11(6, 3.0f, 1.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			break;
+
+		case 5:
+			CalculateR11(5, 1.0f, 0.0f);
+			CalculateG11(5, 1.0f, 0.0f);
+			m_fError = (m_fGrnBlockError + m_fRedBlockError);
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base color, multiplier and selectors
+	//
+	// a_uiSelectorsUsed limits the number of selector combinations to try
+	// a_fBaseRadius limits the range of base colors to try
+	// a_fMultiplierRadius limits the range of multipliers to try
+	//
+	void Block4x4Encoding_RG11::CalculateG11(unsigned int a_uiSelectorsUsed,
+		float a_fBaseRadius, float a_fMultiplierRadius)
+	{
+		// maps from virtual (monotonic) selector to etc selector
+		static const unsigned int auiVirtualSelectorMap[8] = { 3, 2, 1, 0, 4, 5, 6, 7 };
+
+		// find min/max Grn
+		float fMinGrn = 1.0f;
+		float fMaxGrn = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// ignore border pixels
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			float fGrn = m_pafrgbaSource[uiPixel].fG;
+
+			if (fGrn < fMinGrn)
+			{
+				fMinGrn = fGrn;
+			}
+			if (fGrn > fMaxGrn)
+			{
+				fMaxGrn = fGrn;
+			}
+		}
+		assert(fMinGrn <= fMaxGrn);
+
+		float fGrnRange = (fMaxGrn - fMinGrn);
+
+		// try each modifier table entry							  
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			for (unsigned int uiMinVirtualSelector = 0;
+			uiMinVirtualSelector <= (8 - a_uiSelectorsUsed);
+				uiMinVirtualSelector++)
+			{
+				unsigned int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1;
+
+				unsigned int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector];
+				unsigned int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector];
+
+				float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fTableEntryRange = s_aafModifierTable[uiTableEntry][uiMaxSelector] -
+					s_aafModifierTable[uiTableEntry][uiMinSelector];
+
+				float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+				float fCenter = fMinGrn + fCenterRatio*fGrnRange;
+				fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+				float fMinBase = fCenter - (a_fBaseRadius / 255.0f);
+				if (fMinBase < 0.0f)
+				{
+					fMinBase = 0.0f;
+				}
+
+				float fMaxBase = fCenter + (a_fBaseRadius / 255.0f);
+				if (fMaxBase > 1.0f)
+				{
+					fMaxBase = 1.0f;
+				}
+
+				for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+				{
+					float fRangeMultiplier = roundf(fGrnRange / fTableEntryRange);
+
+					float fMinMultiplier = fRangeMultiplier - a_fMultiplierRadius;
+					if (fMinMultiplier < 1.0f)
+					{
+						fMinMultiplier = 0.0f;
+					}
+					else if (fMinMultiplier > 15.0f)
+					{
+						fMinMultiplier = 15.0f;
+					}
+
+					float fMaxMultiplier = fRangeMultiplier + a_fMultiplierRadius;
+					if (fMaxMultiplier < 1.0f)
+					{
+						fMaxMultiplier = 1.0f;
+					}
+					else if (fMaxMultiplier > 15.0f)
+					{
+						fMaxMultiplier = 15.0f;
+					}
+
+					for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+					{
+						// find best selector for each pixel
+						unsigned int auiBestSelectors[PIXELS];
+						float afBestGrnError[PIXELS];
+						float afBestPixelGrn[PIXELS];
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							float fBestPixelGrnError = FLT_MAX;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								//DecodePixelRed is not red channel specific
+								float fPixelGrn = DecodePixelRed(fBase * 255.0f, fMultiplier, uiTableEntry, uiSelector);
+								
+								ColorFloatRGBA frgba(m_pafrgbaSource[uiPixel].fR, fPixelGrn, 0.0f, 1.0f);
+									
+								float fPixelGrnError = CalcPixelError(frgba, 1.0f, m_pafrgbaSource[uiPixel]);
+
+								if (fPixelGrnError < fBestPixelGrnError)
+								{
+									fBestPixelGrnError = fPixelGrnError;
+									auiBestSelectors[uiPixel] = uiSelector;
+									afBestGrnError[uiPixel] = fBestPixelGrnError;
+									afBestPixelGrn[uiPixel] = fPixelGrn;
+								}
+							}
+						}
+						float fBlockError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							fBlockError += afBestGrnError[uiPixel];
+						}
+
+						if (fBlockError < m_fGrnBlockError)
+						{
+							m_fGrnBlockError = fBlockError;
+
+							if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+							{
+								m_fGrnBase = 255.0f * fBase;
+							}
+							else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+							{
+								m_fGrnBase = (fBase * 255) - 128;
+							}
+							else
+							{
+								assert(0);
+							}
+							m_fGrnMultiplier = fMultiplier;
+							m_uiGrnModifierTableIndex = uiTableEntry;
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiGrnSelectors[uiPixel] = auiBestSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel].fG = afBestPixelGrn[uiPixel];
+								m_afDecodedAlphas[uiPixel] = 1.0f;
+							}
+						}
+					}
+				}
+
+			}
+		}
+	}
+	
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RG11::SetEncodingBits(void)
+	{
+		unsigned long long int ulliSelectorBitsR = 0;
+		unsigned long long int ulliSelectorBitsG = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiShift = 45 - (3 * uiPixel);
+			ulliSelectorBitsR |= ((unsigned long long int)m_auiRedSelectors[uiPixel]) << uiShift;
+			ulliSelectorBitsG |= ((unsigned long long int)m_auiGrnSelectors[uiPixel]) << uiShift;
+		}
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsRG11->data.baseR = (unsigned char)roundf(m_fRedBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsRG11->data.baseR = (signed char)roundf(m_fRedBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsRG11->data.tableIndexR = m_uiRedModifierTableIndex;
+		m_pencodingbitsRG11->data.multiplierR = (unsigned char)roundf(m_fRedMultiplier);
+
+		m_pencodingbitsRG11->data.selectorsR0 = ulliSelectorBitsR >> 40;
+		m_pencodingbitsRG11->data.selectorsR1 = ulliSelectorBitsR >> 32;
+		m_pencodingbitsRG11->data.selectorsR2 = ulliSelectorBitsR >> 24;
+		m_pencodingbitsRG11->data.selectorsR3 = ulliSelectorBitsR >> 16;
+		m_pencodingbitsRG11->data.selectorsR4 = ulliSelectorBitsR >> 8;
+		m_pencodingbitsRG11->data.selectorsR5 = ulliSelectorBitsR;
+
+		if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::RG11)
+		{
+			m_pencodingbitsRG11->data.baseG = (unsigned char)roundf(m_fGrnBase);
+		}
+		else if (m_pblockParent->GetImageSource()->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			m_pencodingbitsRG11->data.baseG = (signed char)roundf(m_fGrnBase);
+		}
+		else
+		{
+			assert(0);
+		}
+		m_pencodingbitsRG11->data.tableIndexG = m_uiGrnModifierTableIndex;
+		m_pencodingbitsRG11->data.multiplierG = (unsigned char)roundf(m_fGrnMultiplier);
+
+		m_pencodingbitsRG11->data.selectorsG0 = ulliSelectorBitsG >> 40;
+		m_pencodingbitsRG11->data.selectorsG1 = ulliSelectorBitsG >> 32;
+		m_pencodingbitsRG11->data.selectorsG2 = ulliSelectorBitsG >> 24;
+		m_pencodingbitsRG11->data.selectorsG3 = ulliSelectorBitsG >> 16;
+		m_pencodingbitsRG11->data.selectorsG4 = ulliSelectorBitsG >> 8;
+		m_pencodingbitsRG11->data.selectorsG5 = ulliSelectorBitsG;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h
new file mode 100644
index 0000000..4caac1d
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RG11.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_R11.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_RG11;
+
+	// ################################################################################
+	// Block4x4Encoding_RG11
+	// ################################################################################
+
+	class Block4x4Encoding_RG11 : public Block4x4Encoding_R11
+	{
+		float m_fGrnBase;
+		float m_fGrnMultiplier;
+		float m_fGrnBlockError;
+		unsigned int m_auiGrnSelectors[PIXELS];
+		unsigned int m_uiGrnModifierTableIndex;
+	public:
+
+		Block4x4Encoding_RG11(void);
+		virtual ~Block4x4Encoding_RG11(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+			ColorFloatRGBA *a_pafrgbaSource,
+
+			unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+			unsigned char *a_paucEncodingBits,
+			ColorFloatRGBA *a_pafrgbaSource,
+
+			ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		Block4x4EncodingBits_RG11 *m_pencodingbitsRG11;
+
+		void CalculateG11(unsigned int a_uiSelectorsUsed, float a_fBaseRadius, float a_fMultiplierRadius);
+
+		inline float GetGrnBase(void) const
+		{
+			return m_fGrnBase;
+		}
+
+		inline float GetGrnMultiplier(void) const
+		{
+			return m_fGrnMultiplier;
+		}
+
+		inline int GetGrnTableIndex(void) const
+		{
+			return m_uiGrnModifierTableIndex;
+		}
+
+		inline const unsigned int * GetGrnSelectors(void) const
+		{
+			return m_auiGrnSelectors;
+		}
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp
new file mode 100644
index 0000000..6e55c46
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.cpp
@@ -0,0 +1,1727 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8.cpp
+
+Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8.  
+This encoder is also used for the ETC2 subset of file format RGBA8.
+
+Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8.
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+#include "EtcMath.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] =
+	{
+		3.0f / 255.0f,
+		6.0f / 255.0f,
+		11.0f / 255.0f,
+		16.0f / 255.0f,
+		23.0f / 255.0f,
+		32.0f / 255.0f,
+		41.0f / 255.0f,
+		64.0f / 255.0f
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void)
+	{
+
+		m_pencodingbitsRGB8 = nullptr;
+
+	}
+
+	Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+		
+		// handle ETC1 modes
+		Block4x4Encoding_ETC1::InitFromEncodingBits(a_pblockParent,
+													a_paucEncodingBits, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		if (m_pencodingbitsRGB8->differential.diff)
+		{
+			int iRed1 = (int)m_pencodingbitsRGB8->differential.red1;
+			int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+			int iRed2 = iRed1 + iDRed2;
+
+			int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1;
+			int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+			int iGreen2 = iGreen1 + iDGreen2;
+
+			int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1;
+			int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+			int iBlue2 = iBlue1 + iDBlue2;
+
+			if (iRed2 < 0 || iRed2 > 31)
+			{
+				InitFromEncodingBits_T();
+			}
+			else if (iGreen2 < 0 || iGreen2 > 31)
+			{
+				InitFromEncodingBits_H();
+			}
+			else if (iBlue2 < 0 || iBlue2 > 31)
+			{
+				InitFromEncodingBits_Planar();
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void)
+	{
+
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void)
+	{
+
+		m_mode = MODE_H;
+		
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) + 
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if Planar mode is detected
+	//
+	void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void)
+	{
+
+		m_mode = MODE_PLANAR;
+
+		unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed;
+		unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) +
+										m_pencodingbitsRGB8->planar.originGreen2);
+		unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) +
+										(m_pencodingbitsRGB8->planar.originBlue2 << 3) +
+										(m_pencodingbitsRGB8->planar.originBlue3 << 1) +
+										m_pencodingbitsRGB8->planar.originBlue4);
+
+		unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) +
+									m_pencodingbitsRGB8->planar.horizRed2);
+		unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen;
+		unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) +
+									m_pencodingbitsRGB8->planar.horizBlue2);
+
+		unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) +
+									m_pencodingbitsRGB8->planar.vertRed2);
+		unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) +
+									m_pencodingbitsRGB8->planar.vertGreen2);
+		unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue);
+		m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue);
+
+		DecodePixels_Planar();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			Block4x4Encoding_ETC1::PerformFirstIteration();
+			if (m_boolDone)
+			{
+				break;
+			}
+			TryPlanar(0);
+			SetDoneIfPerfect();
+			if (m_boolDone)
+			{
+				break;
+			}
+			TryTAndH(0);
+			break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1);
+			break;
+
+		case 3:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 4:
+			Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1);
+			break;
+
+		case 5:
+			TryPlanar(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryTAndH(1);
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 9:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 89.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 10:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in Planar mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		encodingTry.CalculatePlanarCornerColors();
+
+		encodingTry.DecodePixels_Planar();
+
+		encodingTry.CalcBlockError();
+
+		if (a_uiRadius > 0)
+		{
+			encodingTry.TwiddlePlanar();
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_PLANAR;
+			m_boolDiff = true;
+			m_boolFlip = false;
+			m_frgbaColor1 = encodingTry.m_frgbaColor1;
+			m_frgbaColor2 = encodingTry.m_frgbaColor2;
+			m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+			}
+
+			m_fError = encodingTry.m_fError;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode or H mode
+	// save this encoding if it improves the error
+	//
+	void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius)
+	{
+
+		CalculateBaseColorsForTAndH();
+
+		TryT(a_uiRadius);
+
+		TryH(a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// calculate original values for base colors
+	// store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2
+	//
+	void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void)
+	{
+
+		ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f;
+
+		// find pixel farthest from average gray line
+		unsigned int uiFarthestPixel = 0;
+		float fFarthestGrayDistance2 = 0.0f;
+		unsigned int uiTransparentPixels = 0;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			// don't count transparent
+			if (m_pafrgbaSource[uiPixel].fA == 0.0f)
+			{
+				uiTransparentPixels++;
+			}
+			else
+			{
+				float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage);
+
+				if (fGrayDistance2 > fFarthestGrayDistance2)
+				{
+					uiFarthestPixel = uiPixel;
+					fFarthestGrayDistance2 = fGrayDistance2;
+				}
+			}
+		}
+		// a transparent block should not reach this method
+		assert(uiTransparentPixels < PIXELS);
+
+		// set the original base colors to:
+		//		half way to the farthest pixel and
+		//		the mirror color on the other side of the average
+		ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f;
+		m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4();
+		m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4();	// the "other side" might be out of range
+
+		// move base colors to find best fit
+		for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++)
+		{
+			// find the center of pixels closest to each color
+			float fPixelsCloserToColor1 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor1;
+			float fPixelsCloserToColor2 = 0.0f;
+			ColorFloatRGBA frgbSumPixelsCloserToColor2;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				// don't count transparent pixels
+				if (m_pafrgbaSource[uiPixel].fA == 0.0f)
+				{
+					continue;
+				}
+
+				float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH);
+				float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH);
+
+				ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * m_pafrgbaSource[uiPixel].fA;
+					
+				if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2)
+				{
+					fPixelsCloserToColor1 += m_pafrgbaSource[uiPixel].fA;
+					frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource;
+				}
+				else
+				{
+					fPixelsCloserToColor2 += m_pafrgbaSource[uiPixel].fA;
+					frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource;
+				}
+			}
+			if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f)
+			{
+				break;
+			}
+
+			ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4();
+			ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4();
+
+			if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR &&
+				frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG &&
+				frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB &&
+				frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR &&
+				frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG &&
+				frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB)
+			{
+				break;
+			}
+
+			m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels;
+			m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = m_frgbaColor2;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+														m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+		
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+		
+		// try each selector
+		for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+														m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// use linear regression to find the best fit for colors along the edges of the 4x4 block
+	//
+	void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void)
+	{
+		ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE];
+		ColorFloatRGBA frgbaSlope;
+		ColorFloatRGBA frgbaOffset;
+
+		// top edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[4];
+		afrgbaRegression[2] = m_pafrgbaSource[8];
+		afrgbaRegression[3] = m_pafrgbaSource[12];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = frgbaOffset;
+		m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// left edge
+		afrgbaRegression[0] = m_pafrgbaSource[0];
+		afrgbaRegression[1] = m_pafrgbaSource[1];
+		afrgbaRegression[2] = m_pafrgbaSource[2];
+		afrgbaRegression[3] = m_pafrgbaSource[3];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f;		// average with top edge
+		m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset;
+
+		// right edge
+		afrgbaRegression[0] = m_pafrgbaSource[12];
+		afrgbaRegression[1] = m_pafrgbaSource[13];
+		afrgbaRegression[2] = m_pafrgbaSource[14];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f;		// average with top edge
+
+		// bottom edge
+		afrgbaRegression[0] = m_pafrgbaSource[3];
+		afrgbaRegression[1] = m_pafrgbaSource[7];
+		afrgbaRegression[2] = m_pafrgbaSource[11];
+		afrgbaRegression[3] = m_pafrgbaSource[15];
+		ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset);
+		m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f;		// average with left edge
+
+		// quantize corner colors to 6/7/6
+		m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6();
+		m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6();
+		m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R, G and B independently
+	//
+	// R, G and B decoding and errors are independent, so R, G and B twiddles can be independent
+	//
+	// return true if improvement
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanar(void)
+	{
+		bool boolImprovement = false;
+
+		while (TwiddlePlanarR())
+		{
+			boolImprovement = true;
+		}
+
+		while (TwiddlePlanarG())
+		{
+			boolImprovement = true;
+		}
+
+		while (TwiddlePlanarB())
+		{
+			boolImprovement = true;
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing R
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarR()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f);
+		int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f);
+		int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f);
+
+		for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++)
+		{
+			// check for out of range
+			if (iTryOriginRed < 0 || iTryOriginRed > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f;
+
+			for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++)
+			{
+				// check for out of range
+				if (iTryHorizRed < 0 || iTryHorizRed > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f;
+
+				for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++)
+				{
+					// check for out of range
+					if (iTryVertRed < 0 || iTryVertRed > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing G
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarG()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f);
+		int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f);
+		int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f);
+
+		for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++)
+		{
+			// check for out of range
+			if (iTryOriginGreen < 0 || iTryOriginGreen > 127)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f;
+
+			for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++)
+			{
+				// check for out of range
+				if (iTryHorizGreen < 0 || iTryHorizGreen > 127)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f;
+
+				for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++)
+				{
+					// check for out of range
+					if (iTryVertGreen < 0 || iTryVertGreen > 127)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginGreen == iOriginGreen && 
+						iTryHorizGreen == iHorizGreen && 
+						iTryVertGreen == iVertGreen)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try different corner colors by slightly changing B
+	//
+	bool Block4x4Encoding_RGB8::TwiddlePlanarB()
+	{
+		bool boolImprovement = false;
+
+		Block4x4Encoding_RGB8 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_PLANAR;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+		}
+
+		int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f);
+		int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f);
+		int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f);
+
+		for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++)
+		{
+			// check for out of range
+			if (iTryOriginBlue < 0 || iTryOriginBlue > 63)
+			{
+				continue;
+			}
+
+			encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f;
+
+			for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++)
+			{
+				// check for out of range
+				if (iTryHorizBlue < 0 || iTryHorizBlue > 63)
+				{
+					continue;
+				}
+
+				encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f;
+
+				for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++)
+				{
+					// check for out of range
+					if (iTryVertBlue < 0 || iTryVertBlue > 63)
+					{
+						continue;
+					}
+
+					// don't bother with null twiddle
+					if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue)
+					{
+						continue;
+					}
+
+					encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f;
+
+					encodingTry.DecodePixels_Planar();
+
+					encodingTry.CalcBlockError();
+
+					if (encodingTry.m_fError < m_fError)
+					{
+						m_mode = MODE_PLANAR;
+						m_boolDiff = true;
+						m_boolFlip = false;
+						m_frgbaColor1 = encodingTry.m_frgbaColor1;
+						m_frgbaColor2 = encodingTry.m_frgbaColor2;
+						m_frgbaColor3 = encodingTry.m_frgbaColor3;
+
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+						}
+
+						m_fError = encodingTry.m_fError;
+
+						boolImprovement = true;
+					}
+				}
+			}
+		}
+
+		return boolImprovement;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits(void)
+	{
+
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			Block4x4Encoding_ETC1::SetEncodingBits();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		m_pencodingbitsRGB8->t.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		m_pencodingbitsRGB8->h.diff = 1;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_PLANAR);
+		assert(m_boolDiff == true);
+
+		unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f);
+		unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f);
+		unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f);
+
+		unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f);
+		unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f);
+		unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f);
+
+		unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f);
+		unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f);
+		unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f);
+
+		m_pencodingbitsRGB8->planar.originRed = uiOriginRed;
+		m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6;
+		m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen;
+		m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5;
+		m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3;
+		m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1;
+		m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue;
+
+		m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1;
+		m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed;
+		m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen;
+		m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5;
+		m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue;
+
+		m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3;
+		m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed;
+		m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2;
+		m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen;
+		m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue;
+
+		m_pencodingbitsRGB8->planar.diff = 1;
+
+		// create valid RG differentials and an invalid B differential to trigger planar mode
+		m_pencodingbitsRGB8->planar.detect1 = 0;
+		m_pencodingbitsRGB8->planar.detect2 = 0;
+		m_pencodingbitsRGB8->planar.detect3 = 0;
+		m_pencodingbitsRGB8->planar.detect4 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect1 = 1;
+		}
+		if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			m_pencodingbitsRGB8->planar.detect2 = 1;
+		}
+		if (iBlue2 >= 4)
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 7;
+			m_pencodingbitsRGB8->planar.detect4 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->planar.detect3 = 0;
+			m_pencodingbitsRGB8->planar.detect4 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+			iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2;
+
+			// make sure red and green don't overflow and blue does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 >= 0 && iGreen2 <= 31);
+			assert(iBlue2 < 0 || iBlue2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for T mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for H mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				break;
+
+			case 2:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the decoded colors and decoded alpha based on the encoding state for Planar mode
+	//
+	void Block4x4Encoding_RGB8::DecodePixels_Planar(void)
+	{
+
+		int iRO = (int)roundf(m_frgbaColor1.fR * 255.0f);
+		int iGO = (int)roundf(m_frgbaColor1.fG * 255.0f);
+		int iBO = (int)roundf(m_frgbaColor1.fB * 255.0f);
+
+		int iRH = (int)roundf(m_frgbaColor2.fR * 255.0f);
+		int iGH = (int)roundf(m_frgbaColor2.fG * 255.0f);
+		int iBH = (int)roundf(m_frgbaColor2.fB * 255.0f);
+
+		int iRV = (int)roundf(m_frgbaColor3.fR * 255.0f);
+		int iGV = (int)roundf(m_frgbaColor3.fG * 255.0f);
+		int iBV = (int)roundf(m_frgbaColor3.fB * 255.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			int iX = (int)(uiPixel >> 2);
+			int iY = (int)(uiPixel & 3);
+
+			int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2;
+			int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2;
+			int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2;
+
+			ColorFloatRGBA frgba;
+			frgba.fR = (float)iR / 255.0f;
+			frgba.fG = (float)iG / 255.0f;
+			frgba.fB = (float)iB / 255.0f;
+			frgba.fA = 1.0f;
+
+			m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a linear regression for the a_uiPixels in a_pafrgbaPixels[]
+	//
+	// output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset
+	//
+	void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+												ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset)
+	{
+		typedef struct
+		{
+			float f[4];
+		} Float4;
+
+		Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels);
+		Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope);
+		Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset);
+
+		float afX[MAX_PLANAR_REGRESSION_SIZE];
+		float afY[MAX_PLANAR_REGRESSION_SIZE];
+
+		// handle r, g and b separately.  don't bother with a
+		for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++)
+		{
+			for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++)
+			{
+				afX[uiPixel] = (float)uiPixel;
+				afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent];
+				Etc::Regression(afX, afY, a_uiPixels, 
+								&(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent]));
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h
new file mode 100644
index 0000000..e405223
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_ETC1.h"
+
+namespace Etc
+{
+
+	class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1
+	{
+	public:
+
+		Block4x4Encoding_RGB8(void);
+		virtual ~Block4x4Encoding_RGB8(void);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+		
+		virtual void SetEncodingBits(void);
+
+		inline ColorFloatRGBA GetColor3(void) const
+		{
+			return m_frgbaColor3;
+		}
+
+	protected:
+
+		static const unsigned int PLANAR_CORNER_COLORS = 3;
+		static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4;
+		static const unsigned int TH_DISTANCES = 8;
+
+		static float s_afTHDistanceTable[TH_DISTANCES];
+
+		void TryPlanar(unsigned int a_uiRadius);
+		void TryTAndH(unsigned int a_uiRadius);
+
+		void InitFromEncodingBits_Planar(void);
+
+		ColorFloatRGBA	m_frgbaColor3;		// used for planar
+
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+		void SetEncodingBits_Planar(void);
+
+		// state shared between iterations
+		ColorFloatRGBA	m_frgbaOriginalColor1_TAndH;
+		ColorFloatRGBA	m_frgbaOriginalColor2_TAndH;
+
+		void CalculateBaseColorsForTAndH(void);
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+	private:
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void CalculatePlanarCornerColors(void);
+
+		void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels,
+			ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset);
+
+		bool TwiddlePlanar(void);
+		bool TwiddlePlanarR();
+		bool TwiddlePlanarG();
+		bool TwiddlePlanarB();
+
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void DecodePixels_Planar(void);
+
+	};
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp
new file mode 100644
index 0000000..fc0678d
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.cpp
@@ -0,0 +1,1819 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGB8A1.cpp contains:
+	Block4x4Encoding_RGB8A1
+	Block4x4Encoding_RGB8A1_Opaque
+	Block4x4Encoding_RGB8A1_Transparent
+
+These encoders are used when targetting file format RGB8A1.
+
+Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGB8A1.h"
+
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+	
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1
+	// ####################################################################################################
+
+	float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] =
+	{
+		{ 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f },
+		{ 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f },
+		{ 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f },
+		{ 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f },
+		{ 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f },
+		{ 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f },
+		{ 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f },
+		{ 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void)
+	{
+		m_pencodingbitsRGB8 = nullptr;
+		m_boolOpaque = false;
+		m_boolTransparent = false;
+		m_boolPunchThroughPixels = true;
+
+	}
+	Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGB8A1::InitFromSource(Block4x4 *a_pblockParent,
+													ColorFloatRGBA *a_pafrgbaSource,
+													unsigned char *a_paucEncodingBits,
+													ErrorMetric a_errormetric)
+	{
+
+		Block4x4Encoding_RGB8::InitFromSource(a_pblockParent,
+			a_pafrgbaSource,
+			a_paucEncodingBits,
+			a_errormetric);
+
+		m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE;
+		m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT;
+		m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels();
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			if (m_pafrgbaSource[uiPixel].fA >= 0.5f)
+			{
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+			else
+			{
+				m_afDecodedAlphas[uiPixel] = 0.0f;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+
+
+		InitFromEncodingBits_ETC1(a_pblockParent,
+			a_paucEncodingBits,
+			a_pafrgbaSource,
+			a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		// detect if there is a T, H or Planar mode present
+		int iRed1 = m_pencodingbitsRGB8->differential.red1;
+		int iDRed2 = m_pencodingbitsRGB8->differential.dred2;
+		int iRed2 = iRed1 + iDRed2;
+
+		int iGreen1 = m_pencodingbitsRGB8->differential.green1;
+		int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2;
+		int iGreen2 = iGreen1 + iDGreen2;
+
+		int iBlue1 = m_pencodingbitsRGB8->differential.blue1;
+		int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2;
+		int iBlue2 = iBlue1 + iDBlue2;
+
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			InitFromEncodingBits_T();
+		}
+		else if (iGreen2 < 0 || iGreen2 > 31)
+		{
+			InitFromEncodingBits_H();
+		}
+		else if (iBlue2 < 0 || iBlue2 > 31)
+		{
+			Block4x4Encoding_RGB8::InitFromEncodingBits_Planar();
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode.
+	// if it isn't an ETC1 mode, this will be overwritten later
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+		unsigned char *a_paucEncodingBits,
+		ColorFloatRGBA *a_pafrgbaSource,
+		ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,
+			a_errormetric);
+
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits;
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = m_pencodingbitsRGB8->differential.flip;
+		m_boolOpaque = m_pencodingbitsRGB8->differential.diff;
+
+		int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2;
+		if (iR2 < 0)
+		{
+			iR2 = 0;
+		}
+		else if (iR2 > 31)
+		{
+			iR2 = 31;
+		}
+
+		int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2;
+		if (iG2 < 0)
+		{
+			iG2 = 0;
+		}
+		else if (iG2 > 31)
+		{
+			iG2 = 31;
+		}
+
+		int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2;
+		if (iB2 < 0)
+		{
+			iB2 = 0;
+		}
+		else if (iB2 > 31)
+		{
+			iB2 = 31;
+		}
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2);
+
+		m_uiCW1 = m_pencodingbitsRGB8->differential.cw1;
+		m_uiCW2 = m_pencodingbitsRGB8->differential.cw2;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		Decode_ETC1();
+
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if T mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void)
+	{
+		m_mode = MODE_T;
+
+		unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) +
+								m_pencodingbitsRGB8->t.red1b);
+		unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1;
+		unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1;
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2;
+		unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2;
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db;
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_T();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding if H mode is detected
+	//
+	void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void)
+	{
+		m_mode = MODE_H;
+
+		unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1;
+		unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) +
+									m_pencodingbitsRGB8->h.green1b);
+		unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) +
+								(m_pencodingbitsRGB8->h.blue1b << 1) +
+								m_pencodingbitsRGB8->h.blue1c);
+
+		unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2;
+		unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) +
+									m_pencodingbitsRGB8->h.green2b);
+		unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2;
+
+		m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1);
+		m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2);
+
+		// used to determine the LSB of the CW
+		unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1);
+		unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2);
+
+		m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1);
+		if (uiRGB1 >= uiRGB2)
+		{
+			m_uiCW1++;
+		}
+
+		Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors();
+
+		DecodePixels_H();
+
+		CalcBlockError();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::Decode_ETC1(void)
+	{
+
+		const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0;
+
+		for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++)
+		{
+			ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2;
+			unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2;
+
+			unsigned int uiPixel = pauiPixelOrder[uiPixelOrder];
+
+			float fDelta;
+			if (m_boolOpaque)
+				fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]];
+			else 
+				fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]];
+
+			if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR)
+			{
+				m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+				m_afDecodedAlphas[uiPixel] = 0.0f;
+			}
+			else
+			{
+				m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for T mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_T(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1;
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2;
+					m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// for H mode, set the decoded colors and decoded alpha based on the encoding state
+	//
+	void Block4x4Encoding_RGB8A1::DecodePixels_H(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+		ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f);
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			switch (m_auiSelectors[uiPixel])
+			{
+			case 0:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 1:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+
+			case 2:
+				if (m_boolOpaque == false)
+				{
+					m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel] = 0.0f;
+				}
+				else
+				{
+					m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB();
+					m_afDecodedAlphas[uiPixel] = 1.0f;
+				}
+				break;
+
+			case 3:
+				m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB();
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+				break;
+			}
+
+		}
+
+	}
+
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// RGB8A1 can't use individual mode
+	// RGB8A1 with transparent pixels can't use planar mode
+	//
+	void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolOpaque);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			if (a_fEffort <= 39.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH();
+			TryT(1);
+			TryH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 4:
+			TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1::PerformFirstIteration(void)
+	{
+		Block4x4Encoding_ETC1::CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		Block4x4Encoding_RGB8A1 encodingTry = *this;
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, 
+													int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+
+		ColorFloatRGBA frgbaColor1;
+		ColorFloatRGBA frgbaColor2;
+
+		const unsigned int *pauiPixelMapping1;
+		const unsigned int *pauiPixelMapping2;
+
+		if (a_boolFlip)
+		{
+			frgbaColor1 = m_frgbaSourceAverageTop;
+			frgbaColor2 = m_frgbaSourceAverageBottom;
+
+			pauiPixelMapping1 = s_auiTopPixelMapping;
+			pauiPixelMapping2 = s_auiBottomPixelMapping;
+		}
+		else
+		{
+			frgbaColor1 = m_frgbaSourceAverageLeft;
+			frgbaColor2 = m_frgbaSourceAverageRight;
+
+			pauiPixelMapping1 = s_auiLeftPixelMapping;
+			pauiPixelMapping2 = s_auiRightPixelMapping;
+		}
+
+		DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, 
+								a_uiRadius, a_iGrayOffset1, a_iGrayOffset2);
+
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+		encodingTry.m_boolFlip = a_boolFlip;
+
+		encodingTry.TryDifferentialHalf(&trys.m_half1);
+		encodingTry.TryDifferentialHalf(&trys.m_half2);
+
+		// find best halves that are within differential range
+		DifferentialTrys::Try *ptryBest1 = nullptr;
+		DifferentialTrys::Try *ptryBest2 = nullptr;
+		encodingTry.m_fError = FLT_MAX;
+
+		// see if the best of each half are in differential range
+		int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed;
+		int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen;
+		int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue;
+		if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3)
+		{
+			ptryBest1 = trys.m_half1.m_ptryBest;
+			ptryBest2 = trys.m_half2.m_ptryBest;
+			encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError;
+		}
+		else
+		{
+			// else, find the next best halves that are in differential range
+			for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0];
+			ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys];
+				ptry1++)
+			{
+				for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0];
+				ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys];
+					ptry2++)
+				{
+					iDRed = ptry2->m_iRed - ptry1->m_iRed;
+					bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4;
+					iDGreen = ptry2->m_iGreen - ptry1->m_iGreen;
+					bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4;
+					iDBlue = ptry2->m_iBlue - ptry1->m_iBlue;
+					bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4;
+
+					if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta)
+					{
+						float fError = ptry1->m_fError + ptry2->m_fError;
+
+						if (fError < encodingTry.m_fError)
+						{
+							encodingTry.m_fError = fError;
+
+							ptryBest1 = ptry1;
+							ptryBest2 = ptry2;
+						}
+					}
+
+				}
+			}
+			assert(encodingTry.m_fError < FLT_MAX);
+			assert(ptryBest1 != nullptr);
+			assert(ptryBest2 != nullptr);
+		}
+
+		if (encodingTry.m_fError < m_fError)
+		{
+			m_mode = MODE_ETC1;
+			m_boolDiff = true;
+			m_boolFlip = encodingTry.m_boolFlip;
+			m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue);
+			m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue);
+			m_uiCW1 = ptryBest1->m_uiCW;
+			m_uiCW2 = ptryBest2->m_uiCW;
+
+			m_fError = 0.0f;
+			for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++)
+			{
+				unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder];
+				unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder];
+
+				unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder];
+				unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				m_auiSelectors[uiPixel1] = uiSelector1;
+				m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder];
+
+				if (uiSelector1 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel1] = 0.0f;
+				}
+				else
+				{
+					float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1];
+					m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB();
+					m_afDecodedAlphas[uiPixel1] = 1.0f;
+				}
+
+				if (uiSelector2 == TRANSPARENT_SELECTOR)
+				{
+					m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA();
+					m_afDecodedAlphas[uiPixel2] = 0.0f;
+				}
+				else
+				{
+					float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2];
+					m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB();
+					m_afDecodedAlphas[uiPixel2] = 1.0f;
+				}
+
+				float fDeltaA1 = m_afDecodedAlphas[uiPixel1] - m_pafrgbaSource[uiPixel1].fA;
+				m_fError += fDeltaA1 * fDeltaA1;
+				float fDeltaA2 = m_afDecodedAlphas[uiPixel2] - m_pafrgbaSource[uiPixel2].fA;
+				m_fError += fDeltaA2 * fDeltaA2;
+			}
+
+			m_fError1 = ptryBest1->m_fError;
+			m_fError2 = ptryBest2->m_fError;
+			m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors;
+			m_fError = m_fError1 + m_fError2;
+
+			// sanity check
+			{
+				int iRed1 = m_frgbaColor1.IntRed(31.0f);
+				int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+				int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+				int iRed2 = m_frgbaColor2.IntRed(31.0f);
+				int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+				int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+				iDRed = iRed2 - iRed1;
+				iDGreen = iGreen2 - iGreen1;
+				iDBlue = iBlue2 - iBlue1;
+
+				assert(iDRed >= -4 && iDRed < 4);
+				assert(iDGreen >= -4 && iDGreen < 4);
+				assert(iDBlue >= -4 && iDBlue < 4);
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// mostly copied from ETC1
+	// differences:
+	//		uses s_aafCwOpaqueUnsetTable
+	//		color for selector set to 0,0,0,0
+	//
+	void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf)
+	{
+
+		a_phalf->m_ptryBest = nullptr;
+		float fBestTryError = FLT_MAX;
+
+		a_phalf->m_uiTrys = 0;
+		for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius;
+		iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius;
+			iRed++)
+		{
+			assert(iRed >= 0 && iRed <= 31);
+
+			for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius;
+			iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius;
+				iGreen++)
+			{
+				assert(iGreen >= 0 && iGreen <= 31);
+
+				for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius;
+				iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius;
+					iBlue++)
+				{
+					assert(iBlue >= 0 && iBlue <= 31);
+
+					DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys];
+					assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]);
+
+					ptry->m_iRed = iRed;
+					ptry->m_iGreen = iGreen;
+					ptry->m_iBlue = iBlue;
+					ptry->m_fError = FLT_MAX;
+					ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue);
+
+					// try each CW
+					for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++)
+					{
+						unsigned int auiPixelSelectors[PIXELS / 2];
+						ColorFloatRGBA	afrgbaDecodedColors[PIXELS / 2];
+						float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+							FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+
+						// pre-compute decoded pixels for each selector
+						ColorFloatRGBA afrgbaSelectors[SELECTORS];
+						assert(SELECTORS == 4);
+						afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB();
+						afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB();
+						afrgbaSelectors[2] = ColorFloatRGBA();
+						afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB();
+
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[a_phalf->m_pauiPixelMapping[uiPixel]];
+							ColorFloatRGBA frgbaDecodedPixel;
+
+							for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++)
+							{
+								if (pfrgbaSourcePixel->fA < 0.5f)
+								{
+									uiSelector = TRANSPARENT_SELECTOR;
+								}
+								else if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									continue;
+								}
+
+								frgbaDecodedPixel = afrgbaSelectors[uiSelector];
+
+								float fPixelError;
+								
+								fPixelError = CalcPixelError(frgbaDecodedPixel, m_afDecodedAlphas[a_phalf->m_pauiPixelMapping[uiPixel]],
+																	*pfrgbaSourcePixel);
+
+								if (fPixelError < afPixelErrors[uiPixel])
+								{
+									auiPixelSelectors[uiPixel] = uiSelector;
+									afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel;
+									afPixelErrors[uiPixel] = fPixelError;
+								}
+
+								if (uiSelector == TRANSPARENT_SELECTOR)
+								{
+									break;
+								}
+							}
+						}
+
+						// add up all pixel errors
+						float fCWError = 0.0f;
+						for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+						{
+							fCWError += afPixelErrors[uiPixel];
+						}
+
+						// if best CW so far
+						if (fCWError < ptry->m_fError)
+						{
+							ptry->m_uiCW = uiCW;
+							for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++)
+							{
+								ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel];
+							}
+							ptry->m_fError = fCWError;
+						}
+
+					}
+
+					if (ptry->m_fError < fBestTryError)
+					{
+						a_phalf->m_ptryBest = ptry;
+						fBestTryError = ptry->m_fError;
+					}
+
+					assert(ptry->m_fError < FLT_MAX);
+
+					a_phalf->m_uiTrys++;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in T mode
+	// save this encoding if it improves the error
+	//
+	// since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently
+	// better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower
+	//
+	void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_T;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			// twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector
+			//
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH;
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++)
+						{
+							if (uiBaseColorSwaps == 0)
+							{
+								encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+								encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+							}
+							else
+							{
+								encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH;
+								encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+							}
+
+							encodingTry.TryT_BestSelectorCombination();
+
+							if (encodingTry.m_fError < m_fError)
+							{
+								m_mode = encodingTry.m_mode;
+								m_boolDiff = encodingTry.m_boolDiff;
+								m_boolFlip = encodingTry.m_boolFlip;
+
+								m_frgbaColor1 = encodingTry.m_frgbaColor1;
+								m_frgbaColor2 = encodingTry.m_frgbaColor2;
+								m_uiCW1 = encodingTry.m_uiCW1;
+
+								for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+								{
+									m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+									m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+								}
+
+								m_fError = encodingTry.m_fError;
+							}
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryT
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void)
+	{
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+			FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = m_frgbaColor1;
+		afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+													m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try encoding in H mode
+	// save this encoding if it improves the error
+	//
+	// since all pixels use the distance table, color1 and color2 can NOT be twiddled independently
+	// TWIDDLE_RADIUS of 2 is WAY too slow
+	//
+	void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius)
+	{
+		Block4x4Encoding_RGB8A1 encodingTry = *this;
+
+		// init "try"
+		{
+			encodingTry.m_mode = MODE_H;
+			encodingTry.m_boolDiff = true;
+			encodingTry.m_boolFlip = false;
+			encodingTry.m_fError = FLT_MAX;
+		}
+
+		int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f);
+		int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f);
+		int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f);
+
+		int iMinRed1 = iColor1Red - (int)a_uiRadius;
+		if (iMinRed1 < 0)
+		{
+			iMinRed1 = 0;
+		}
+		int iMaxRed1 = iColor1Red + (int)a_uiRadius;
+		if (iMaxRed1 > 15)
+		{
+			iMinRed1 = 15;
+		}
+
+		int iMinGreen1 = iColor1Green - (int)a_uiRadius;
+		if (iMinGreen1 < 0)
+		{
+			iMinGreen1 = 0;
+		}
+		int iMaxGreen1 = iColor1Green + (int)a_uiRadius;
+		if (iMaxGreen1 > 15)
+		{
+			iMinGreen1 = 15;
+		}
+
+		int iMinBlue1 = iColor1Blue - (int)a_uiRadius;
+		if (iMinBlue1 < 0)
+		{
+			iMinBlue1 = 0;
+		}
+		int iMaxBlue1 = iColor1Blue + (int)a_uiRadius;
+		if (iMaxBlue1 > 15)
+		{
+			iMinBlue1 = 15;
+		}
+
+		int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f);
+		int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f);
+		int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f);
+
+		int iMinRed2 = iColor2Red - (int)a_uiRadius;
+		if (iMinRed2 < 0)
+		{
+			iMinRed2 = 0;
+		}
+		int iMaxRed2 = iColor2Red + (int)a_uiRadius;
+		if (iMaxRed2 > 15)
+		{
+			iMinRed2 = 15;
+		}
+
+		int iMinGreen2 = iColor2Green - (int)a_uiRadius;
+		if (iMinGreen2 < 0)
+		{
+			iMinGreen2 = 0;
+		}
+		int iMaxGreen2 = iColor2Green + (int)a_uiRadius;
+		if (iMaxGreen2 > 15)
+		{
+			iMinGreen2 = 15;
+		}
+
+		int iMinBlue2 = iColor2Blue - (int)a_uiRadius;
+		if (iMinBlue2 < 0)
+		{
+			iMinBlue2 = 0;
+		}
+		int iMaxBlue2 = iColor2Blue + (int)a_uiRadius;
+		if (iMaxBlue2 > 15)
+		{
+			iMinBlue2 = 15;
+		}
+
+		for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++)
+		{
+			encodingTry.m_uiCW1 = uiDistance;
+
+			// twiddle m_frgbaOriginalColor1_TAndH
+			for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++)
+			{
+				for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++)
+				{
+					for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++)
+					{
+						encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1);
+						encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH;
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+			// twiddle m_frgbaOriginalColor2_TAndH
+			for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++)
+			{
+				for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++)
+				{
+					for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++)
+					{
+						encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH;
+						encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2);
+
+						// if color1 == color2, H encoding issues can pop up, so abort
+						if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue)
+						{
+							continue;
+						}
+
+						encodingTry.TryH_BestSelectorCombination();
+
+						if (encodingTry.m_fError < m_fError)
+						{
+							m_mode = encodingTry.m_mode;
+							m_boolDiff = encodingTry.m_boolDiff;
+							m_boolFlip = encodingTry.m_boolFlip;
+
+							m_frgbaColor1 = encodingTry.m_frgbaColor1;
+							m_frgbaColor2 = encodingTry.m_frgbaColor2;
+							m_uiCW1 = encodingTry.m_uiCW1;
+
+							for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+							{
+								m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel];
+								m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel];
+							}
+
+							m_fError = encodingTry.m_fError;
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best selector combination for TryH
+	// called on an encodingTry
+	//
+	void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void)
+	{
+
+		// abort if colors and CW will pose an encoding problem
+		{
+			unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f);
+			unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f);
+			unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f);
+			unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+
+			unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f);
+			unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f);
+			unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f);
+			unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+			unsigned int uiCWLsb = m_uiCW1 & 1;
+
+			if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 ||
+				(uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1)
+			{
+				return;
+			}
+		}
+
+		float fDistance = s_afTHDistanceTable[m_uiCW1];
+
+		unsigned int auiBestPixelSelectors[PIXELS];
+		float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
+											FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX };
+		ColorFloatRGBA	afrgbaBestDecodedPixels[PIXELS];
+		ColorFloatRGBA afrgbaDecodedPixel[SELECTORS];
+
+		assert(SELECTORS == 4);
+		afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB();
+		afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB();
+		afrgbaDecodedPixel[2] = ColorFloatRGBA();;
+		afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB();
+
+
+		// try each selector
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			unsigned int uiMinSelector = 0;
+			unsigned int uiMaxSelector = SELECTORS - 1;
+
+			if (m_pafrgbaSource[uiPixel].fA < 0.5f)
+			{
+				uiMinSelector = 2;
+				uiMaxSelector = 2;
+			}
+
+			for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++)
+			{
+				float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], m_afDecodedAlphas[uiPixel],
+													m_pafrgbaSource[uiPixel]);
+
+				if (fPixelError < afBestPixelErrors[uiPixel])
+				{
+					afBestPixelErrors[uiPixel] = fPixelError;
+					auiBestPixelSelectors[uiPixel] = uiSelector;
+					afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector];
+				}
+			}
+		}
+		
+
+		// add up all of the pixel errors
+		float fBlockError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			fBlockError += afBestPixelErrors[uiPixel];
+		}
+
+		if (fBlockError < m_fError)
+		{
+			m_fError = fBlockError;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel];
+				m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel];
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 1 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates1(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 2 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates2(void)
+	{
+
+		TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2);
+		TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 3 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates3(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, -2, 2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, -2);
+		TryDifferential(m_boolMostLikelyFlip, 1, 2, 2);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// try version 4 of the degenerate search
+	// degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings
+	// each subsequent version of the degenerate search uses more basecolor movement and is less likely to
+	//		be successfull
+	//
+	void Block4x4Encoding_RGB8A1::TryDegenerates4(void)
+	{
+
+		TryDifferential(m_boolMostLikelyFlip, 1, -4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 4, 0);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, 4);
+		TryDifferential(m_boolMostLikelyFlip, 1, 0, -4);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits(void)
+	{
+		switch (m_mode)
+		{
+		case MODE_ETC1:
+			SetEncodingBits_ETC1();
+			break;
+
+		case MODE_T:
+			SetEncodingBits_T();
+			break;
+
+		case MODE_H:
+			SetEncodingBits_H();
+			break;
+
+		case MODE_PLANAR:
+			Block4x4Encoding_RGB8::SetEncodingBits_Planar();
+			break;
+
+		default:
+			assert(false);
+		}
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if ETC1 mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void)
+	{
+
+		// there is no individual mode in RGB8A1
+		assert(m_boolDiff);
+
+		int iRed1 = m_frgbaColor1.IntRed(31.0f);
+		int iGreen1 = m_frgbaColor1.IntGreen(31.0f);
+		int iBlue1 = m_frgbaColor1.IntBlue(31.0f);
+
+		int iRed2 = m_frgbaColor2.IntRed(31.0f);
+		int iGreen2 = m_frgbaColor2.IntGreen(31.0f);
+		int iBlue2 = m_frgbaColor2.IntBlue(31.0f);
+
+		int iDRed2 = iRed2 - iRed1;
+		int iDGreen2 = iGreen2 - iGreen1;
+		int iDBlue2 = iBlue2 - iBlue1;
+
+		assert(iDRed2 >= -4 && iDRed2 < 4);
+		assert(iDGreen2 >= -4 && iDGreen2 < 4);
+		assert(iDBlue2 >= -4 && iDBlue2 < 4);
+
+		m_pencodingbitsRGB8->differential.red1 = iRed1;
+		m_pencodingbitsRGB8->differential.green1 = iGreen1;
+		m_pencodingbitsRGB8->differential.blue1 = iBlue1;
+
+		m_pencodingbitsRGB8->differential.dred2 = iDRed2;
+		m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2;
+		m_pencodingbitsRGB8->differential.dblue2 = iDBlue2;
+
+		m_pencodingbitsRGB8->individual.cw1 = m_uiCW1;
+		m_pencodingbitsRGB8->individual.cw2 = m_uiCW2;
+
+		SetEncodingBits_Selectors();
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		m_pencodingbitsRGB8->individual.flip = m_boolFlip;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if T mode
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_T);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2;
+		m_pencodingbitsRGB8->t.red1b = uiRed1;
+		m_pencodingbitsRGB8->t.green1 = uiGreen1;
+		m_pencodingbitsRGB8->t.blue1 = uiBlue1;
+
+		m_pencodingbitsRGB8->t.red2 = uiRed2;
+		m_pencodingbitsRGB8->t.green2 = uiGreen2;
+		m_pencodingbitsRGB8->t.blue2 = uiBlue2;
+
+		m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1;
+		m_pencodingbitsRGB8->t.db = m_uiCW1;
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->t.detect1 = 0;
+		m_pencodingbitsRGB8->t.detect2 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		if (iRed2 >= 4)
+		{
+			m_pencodingbitsRGB8->t.detect1 = 7;
+			m_pencodingbitsRGB8->t.detect2 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->t.detect1 = 0;
+			m_pencodingbitsRGB8->t.detect2 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+
+			// make sure red overflows
+			assert(iRed2 < 0 || iRed2 > 31);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state if H mode
+	//
+	// colors and selectors may need to swap in order to generate lsb of distance index
+	//
+	void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void)
+	{
+		static const bool SANITY_CHECK = true;
+
+		assert(m_mode == MODE_H);
+		assert(m_boolDiff == true);
+
+		unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f);
+		unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f);
+		unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f);
+
+		unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f);
+		unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f);
+		unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f);
+
+		unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1;
+		unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2;
+
+		bool boolOddDistance = m_uiCW1 & 1;
+		bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance;
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed2;
+			m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue2;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed1;
+			m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue1;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.red1 = uiRed1;
+			m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1;
+			m_pencodingbitsRGB8->h.green1b = uiGreen1;
+			m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3;
+			m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1;
+			m_pencodingbitsRGB8->h.blue1c = uiBlue1;
+
+			m_pencodingbitsRGB8->h.red2 = uiRed2;
+			m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1;
+			m_pencodingbitsRGB8->h.green2b = uiGreen2;
+			m_pencodingbitsRGB8->h.blue2 = uiBlue2;
+
+			m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2;
+			m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1;
+		}
+
+		// in RGB8A1 encoding bits, opaque replaces differential
+		m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels;
+
+		Block4x4Encoding_ETC1::SetEncodingBits_Selectors();
+
+		if (boolSwapColors)
+		{
+			m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF;
+		}
+
+		// create an invalid R differential to trigger T mode
+		m_pencodingbitsRGB8->h.detect1 = 0;
+		m_pencodingbitsRGB8->h.detect2 = 0;
+		m_pencodingbitsRGB8->h.detect3 = 0;
+		int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+		int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+		if (iRed2 < 0 || iRed2 > 31)
+		{
+			m_pencodingbitsRGB8->h.detect1 = 1;
+		}
+		if (iGreen2 >= 4)
+		{
+			m_pencodingbitsRGB8->h.detect2 = 7;
+			m_pencodingbitsRGB8->h.detect3 = 0;
+		}
+		else
+		{
+			m_pencodingbitsRGB8->h.detect2 = 0;
+			m_pencodingbitsRGB8->h.detect3 = 1;
+		}
+
+		if (SANITY_CHECK)
+		{
+			iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2;
+			iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2;
+
+			// make sure red doesn't overflow and green does
+			assert(iRed2 >= 0 && iRed2 <= 31);
+			assert(iGreen2 < 0 || iGreen2 > 31);
+		}
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1_Opaque
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1_Opaque::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolPunchThroughPixels);
+		assert(!m_boolTransparent);
+		assert(!m_boolDone);
+
+		switch (m_uiEncodingIterations)
+		{
+		case 0:
+			PerformFirstIteration();
+			break;
+
+		case 1:
+			Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 2:
+			Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0);
+			break;
+
+		case 3:
+			Block4x4Encoding_RGB8::TryPlanar(1);
+			break;
+
+		case 4:
+			Block4x4Encoding_RGB8::TryTAndH(1);
+			if (a_fEffort <= 49.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 5:
+			Block4x4Encoding_ETC1::TryDegenerates1();
+			if (a_fEffort <= 59.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 6:
+			Block4x4Encoding_ETC1::TryDegenerates2();
+			if (a_fEffort <= 69.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 7:
+			Block4x4Encoding_ETC1::TryDegenerates3();
+			if (a_fEffort <= 79.5f)
+			{
+				m_boolDone = true;
+			}
+			break;
+
+		case 8:
+			Block4x4Encoding_ETC1::TryDegenerates4();
+			m_boolDone = true;
+			break;
+
+		default:
+			assert(0);
+			break;
+		}
+
+		m_uiEncodingIterations++;
+		SetDoneIfPerfect();
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find best initial encoding to ensure block has a valid encoding
+	//
+	void Block4x4Encoding_RGB8A1_Opaque::PerformFirstIteration(void)
+	{
+		
+		// set decoded alphas
+		// calculate alpha error
+		m_fError = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afDecodedAlphas[uiPixel] = 1.0f;
+
+			float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA;
+			m_fError += fDeltaA * fDeltaA;
+		}
+
+		CalculateMostLikelyFlip();
+
+		m_fError = FLT_MAX;
+
+		Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryPlanar(0);
+		SetDoneIfPerfect();
+		if (m_boolDone)
+		{
+			return;
+		}
+		Block4x4Encoding_RGB8::TryTAndH(0);
+		SetDoneIfPerfect();
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGB8A1_Transparent
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGB8A1_Transparent::PerformIteration(float )
+	{
+		assert(!m_boolOpaque);
+		assert(m_boolTransparent);
+		assert(!m_boolDone);
+		assert(m_uiEncodingIterations == 0);
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = false;
+
+		m_uiCW1 = 0;
+		m_uiCW2 = 0;
+
+		m_frgbaColor1 = ColorFloatRGBA();
+		m_frgbaColor2 = ColorFloatRGBA();
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR;
+
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+			m_afDecodedAlphas[uiPixel] = 0.0f;
+		}
+
+		CalcBlockError();
+
+		m_boolDone = true;
+		m_uiEncodingIterations++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h
new file mode 100644
index 0000000..91b9355
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGB8A1.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcErrorMetric.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+namespace Etc
+{
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1
+	// RGB8A1 if not completely opaque or transparent
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		static const unsigned int TRANSPARENT_SELECTOR = 2;
+
+		Block4x4Encoding_RGB8A1(void);
+		virtual ~Block4x4Encoding_RGB8A1(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+									unsigned char *a_paucEncodingBits,
+									ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+		void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent,
+										unsigned char *a_paucEncodingBits,
+										ColorFloatRGBA *a_pafrgbaSource,
+										ErrorMetric a_errormetric);
+
+		void InitFromEncodingBits_T(void);
+		void InitFromEncodingBits_H(void);
+
+		void PerformFirstIteration(void);
+
+		void Decode_ETC1(void);
+		void DecodePixels_T(void);
+		void DecodePixels_H(void);
+		void SetEncodingBits_ETC1(void);
+		void SetEncodingBits_T(void);
+		void SetEncodingBits_H(void);
+
+	protected:
+
+		bool m_boolOpaque;				// all source pixels have alpha >= 0.5
+		bool m_boolTransparent;			// all source pixels have alpha < 0.5
+		bool m_boolPunchThroughPixels;	// some source pixels have alpha < 0.5
+
+		static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS];
+
+	private:
+
+		void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius,
+								int a_iGrayOffset1, int a_iGrayOffset2);
+		void TryDifferentialHalf(DifferentialTrys::Half *a_phalf);
+
+		void TryT(unsigned int a_uiRadius);
+		void TryT_BestSelectorCombination(void);
+		void TryH(unsigned int a_uiRadius);
+		void TryH_BestSelectorCombination(void);
+
+		void TryDegenerates1(void);
+		void TryDegenerates2(void);
+		void TryDegenerates3(void);
+		void TryDegenerates4(void);
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1_Opaque
+	// RGB8A1 if all pixels have alpha==1
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		void PerformFirstIteration(void);
+
+	private:
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGB8A1_Transparent
+	// RGB8A1 if all pixels have alpha==0
+	// ################################################################################
+
+	class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+	private:
+
+	};
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp
new file mode 100644
index 0000000..0ca531d
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.cpp
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcBlock4x4Encoding_RGBA8.cpp contains:
+	Block4x4Encoding_RGBA8
+	Block4x4Encoding_RGBA8_Opaque
+	Block4x4Encoding_RGBA8_Transparent
+
+These encoders are used when targetting file format RGBA8.
+
+Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque
+Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent
+Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcBlock4x4Encoding_RGBA8.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8
+	// ####################################################################################################
+
+	float Block4x4Encoding_RGBA8::s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS]
+	{
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -9.0f / 255.0f, -15.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 8.0f / 255.0f, 14.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f, -10.0f / 255.0f, -13.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 9.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 12.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -6.0f / 255.0f, -13.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 12.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -12.0f / 255.0f, 2.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f, 11.0f / 255.0f },
+		{ -3.0f / 255.0f, -7.0f / 255.0f,  -9.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 6.0f / 255.0f, 8.0f / 255.0f, 10.0f / 255.0f },
+		{ -4.0f / 255.0f, -7.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -11.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f, 10.0f / 255.0f },
+
+		{ -2.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -4.0f / 255.0f,  -8.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 3.0f / 255.0f, 7.0f / 255.0f,  9.0f / 255.0f },
+		{ -2.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 1.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+
+		{ -3.0f / 255.0f, -4.0f / 255.0f,  -7.0f / 255.0f, -10.0f / 255.0f, 2.0f / 255.0f, 3.0f / 255.0f, 6.0f / 255.0f,  9.0f / 255.0f },
+		{ -1.0f / 255.0f, -2.0f / 255.0f,  -3.0f / 255.0f, -10.0f / 255.0f, 0.0f / 255.0f, 1.0f / 255.0f, 2.0f / 255.0f,  9.0f / 255.0f },
+		{ -4.0f / 255.0f, -6.0f / 255.0f,  -8.0f / 255.0f,  -9.0f / 255.0f, 3.0f / 255.0f, 5.0f / 255.0f, 7.0f / 255.0f,  8.0f / 255.0f },
+		{ -3.0f / 255.0f, -5.0f / 255.0f,  -7.0f / 255.0f,  -9.0f / 255.0f, 2.0f / 255.0f, 4.0f / 255.0f, 6.0f / 255.0f,  8.0f / 255.0f }
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void)
+	{
+
+		m_pencodingbitsA8 = nullptr;
+
+	}
+	Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {}
+	// ----------------------------------------------------------------------------------------------------
+	// initialization prior to encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits
+	//
+	void Block4x4Encoding_RGBA8::InitFromSource(Block4x4 *a_pblockParent,
+												ColorFloatRGBA *a_pafrgbaSource,
+												unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric)
+	{
+		Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource,a_errormetric);
+
+		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// initialization from the encoding bits of a previous encoding
+	// a_pblockParent points to the block associated with this encoding
+	// a_errormetric is used to choose the best encoding
+	// a_pafrgbaSource points to a 4x4 block subset of the source image
+	// a_paucEncodingBits points to the final encoding bits of a previous encoding
+	//
+	void Block4x4Encoding_RGBA8::InitFromEncodingBits(Block4x4 *a_pblockParent,
+														unsigned char *a_paucEncodingBits,
+														ColorFloatRGBA *a_pafrgbaSource,
+														ErrorMetric a_errormetric)
+	{
+
+		m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits;
+		m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + sizeof(Block4x4EncodingBits_A8));
+
+		// init RGB portion
+		Block4x4Encoding_RGB8::InitFromEncodingBits(a_pblockParent,
+													(unsigned char *) m_pencodingbitsRGB8,
+													a_pafrgbaSource,
+													a_errormetric);
+
+		// init A8 portion
+		// has to be done after InitFromEncodingBits()
+		{
+			m_fBase = m_pencodingbitsA8->data.base / 255.0f;
+			m_fMultiplier = (float)m_pencodingbitsA8->data.multiplier;
+			m_uiModifierTableIndex = m_pencodingbitsA8->data.table;
+
+			unsigned long long int ulliSelectorBits = 0;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors0 << 40;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors1 << 32;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors2 << 24;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors3 << 16;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors4 << 8;
+			ulliSelectorBits |= (unsigned long long int)m_pencodingbitsA8->data.selectors5;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (ALPHA_SELECTORS - 1);
+			}
+
+			// decode the alphas
+			// calc alpha error
+			m_fError = 0.0f;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afDecodedAlphas[uiPixel] = DecodePixelAlpha(m_fBase, m_fMultiplier,
+					m_uiModifierTableIndex,
+					m_auiAlphaSelectors[uiPixel]);
+
+				float fDeltaAlpha = m_afDecodedAlphas[uiPixel] - m_pafrgbaSource[uiPixel].fA;
+				m_fError += fDeltaAlpha * fDeltaAlpha;
+			}
+		}
+
+		// redo error calc to include alpha
+		CalcBlockError();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	// similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added
+	//
+	void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		if (m_uiEncodingIterations == 0)
+		{
+			if (a_fEffort < 24.9f)
+			{
+				CalculateA8(0.0f);
+			}
+			else if (a_fEffort < 49.9f)
+			{
+				CalculateA8(1.0f);
+			}
+			else
+			{
+				CalculateA8(2.0f);
+			}
+		}
+
+		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// find the best combination of base alpga, multiplier and selectors
+	//
+	// a_fRadius limits the range of base alpha to try
+	//
+	void Block4x4Encoding_RGBA8::CalculateA8(float a_fRadius)
+	{
+
+		// find min/max alpha
+		float fMinAlpha = 1.0f;
+		float fMaxAlpha = 0.0f;
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			float fAlpha = m_pafrgbaSource[uiPixel].fA;
+
+			// ignore border pixels
+			if (isnan(fAlpha))
+			{
+				continue;
+			}
+
+			if (fAlpha < fMinAlpha)
+			{
+				fMinAlpha = fAlpha;
+			}
+			if (fAlpha > fMaxAlpha)
+			{
+				fMaxAlpha = fAlpha;
+			}
+		}
+		assert(fMinAlpha <= fMaxAlpha);
+
+		float fAlphaRange = fMaxAlpha - fMinAlpha;
+
+		// try each modifier table entry
+		m_fError = FLT_MAX;		// artificially high value
+		for (unsigned int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++)
+		{
+			static const unsigned int MIN_VALUE_SELECTOR = 3;
+			static const unsigned int MAX_VALUE_SELECTOR = 7;
+
+			float fTableEntryCenter = -s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
+
+			float fTableEntryRange = s_aafModifierTable[uiTableEntry][MAX_VALUE_SELECTOR] -
+				s_aafModifierTable[uiTableEntry][MIN_VALUE_SELECTOR];
+
+			float fCenterRatio = fTableEntryCenter / fTableEntryRange;
+
+			float fCenter = fMinAlpha + fCenterRatio*fAlphaRange;
+			fCenter = roundf(255.0f * fCenter) / 255.0f;
+
+			float fMinBase = fCenter - (a_fRadius / 255.0f);
+			if (fMinBase < 0.0f)
+			{
+				fMinBase = 0.0f;
+			}
+
+			float fMaxBase = fCenter + (a_fRadius / 255.0f);
+			if (fMaxBase > 1.0f)
+			{
+				fMaxBase = 1.0f;
+			}
+
+			for (float fBase = fMinBase; fBase <= fMaxBase; fBase += (0.999999f / 255.0f))
+			{
+
+				float fRangeMultiplier = roundf(fAlphaRange / fTableEntryRange);
+
+				float fMinMultiplier = fRangeMultiplier - a_fRadius;
+				if (fMinMultiplier < 1.0f)
+				{
+					fMinMultiplier = 1.0f;
+				}
+				else if (fMinMultiplier > 15.0f)
+				{
+					fMinMultiplier = 15.0f;
+				}
+
+				float fMaxMultiplier = fRangeMultiplier + a_fRadius;
+				if (fMaxMultiplier < 1.0f)
+				{
+					fMaxMultiplier = 1.0f;
+				}
+				else if (fMaxMultiplier > 15.0f)
+				{
+					fMaxMultiplier = 15.0f;
+				}
+
+				for (float fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier += 1.0f)
+				{
+					// find best selector for each pixel
+					unsigned int auiBestSelectors[PIXELS];
+					float afBestAlphaError[PIXELS];
+					float afBestDecodedAlphas[PIXELS];
+					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+					{
+						float fBestPixelAlphaError = FLT_MAX;
+						for (unsigned int uiSelector = 0; uiSelector < ALPHA_SELECTORS; uiSelector++)
+						{
+							float fDecodedAlpha = DecodePixelAlpha(fBase, fMultiplier, uiTableEntry, uiSelector);
+
+							// border pixels (NAN) should have zero error
+							float fPixelDeltaAlpha = isnan(m_pafrgbaSource[uiPixel].fA) ?
+															0.0f :
+															fDecodedAlpha - m_pafrgbaSource[uiPixel].fA;
+
+							float fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha;
+
+							if (fPixelAlphaError < fBestPixelAlphaError)
+							{
+								fBestPixelAlphaError = fPixelAlphaError;
+								auiBestSelectors[uiPixel] = uiSelector;
+								afBestAlphaError[uiPixel] = fBestPixelAlphaError;
+								afBestDecodedAlphas[uiPixel] = fDecodedAlpha;
+							}
+						}
+					}
+
+					float fBlockError = 0.0f;
+					for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+					{
+						fBlockError += afBestAlphaError[uiPixel];
+					}
+
+					if (fBlockError < m_fError)
+					{
+						m_fError = fBlockError;
+
+						m_fBase = fBase;
+						m_fMultiplier = fMultiplier;
+						m_uiModifierTableIndex = uiTableEntry;
+						for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+						{
+							m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel];
+							m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel];
+						}
+					}
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8::SetEncodingBits(void)
+	{
+
+		// set the RGB8 portion
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		{
+			m_pencodingbitsA8->data.base = (unsigned char)roundf(255.0f * m_fBase);
+			m_pencodingbitsA8->data.table = m_uiModifierTableIndex;
+			m_pencodingbitsA8->data.multiplier = (unsigned char)roundf(m_fMultiplier);
+
+			unsigned long long int ulliSelectorBits = 0;
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				unsigned int uiShift = 45 - (3 * uiPixel);
+				ulliSelectorBits |= ((unsigned long long int)m_auiAlphaSelectors[uiPixel]) << uiShift;
+			}
+
+			m_pencodingbitsA8->data.selectors0 = ulliSelectorBits >> 40;
+			m_pencodingbitsA8->data.selectors1 = ulliSelectorBits >> 32;
+			m_pencodingbitsA8->data.selectors2 = ulliSelectorBits >> 24;
+			m_pencodingbitsA8->data.selectors3 = ulliSelectorBits >> 16;
+			m_pencodingbitsA8->data.selectors4 = ulliSelectorBits >> 8;
+			m_pencodingbitsA8->data.selectors5 = ulliSelectorBits;
+		}
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8_Opaque
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGBA8_Opaque::PerformIteration(float a_fEffort)
+	{
+		assert(!m_boolDone);
+
+		if (m_uiEncodingIterations == 0)
+		{
+			m_fError = 0.0f;
+
+			for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+			{
+				m_afDecodedAlphas[uiPixel] = 1.0f;
+			}
+		}
+
+		Block4x4Encoding_RGB8::PerformIteration(a_fEffort);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8_Opaque::SetEncodingBits(void)
+	{
+
+		// set the RGB8 portion
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		m_pencodingbitsA8->data.base = 255;
+		m_pencodingbitsA8->data.table = 15;
+		m_pencodingbitsA8->data.multiplier = 15;
+		m_pencodingbitsA8->data.selectors0 = 0xFF;
+		m_pencodingbitsA8->data.selectors1 = 0xFF;
+		m_pencodingbitsA8->data.selectors2 = 0xFF;
+		m_pencodingbitsA8->data.selectors3 = 0xFF;
+		m_pencodingbitsA8->data.selectors4 = 0xFF;
+		m_pencodingbitsA8->data.selectors5 = 0xFF;
+
+	}
+
+	// ####################################################################################################
+	// Block4x4Encoding_RGBA8_Transparent
+	// ####################################################################################################
+
+	// ----------------------------------------------------------------------------------------------------
+	// perform a single encoding iteration
+	// replace the encoding if a better encoding was found
+	// subsequent iterations generally take longer for each iteration
+	// set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort
+	//
+	void Block4x4Encoding_RGBA8_Transparent::PerformIteration(float )
+	{
+		assert(!m_boolDone);
+		assert(m_uiEncodingIterations == 0);
+
+		m_mode = MODE_ETC1;
+		m_boolDiff = true;
+		m_boolFlip = false;
+
+		for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++)
+		{
+			m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA();
+			m_afDecodedAlphas[uiPixel] = 0.0f;
+		}
+
+		m_fError = 0.0f;
+
+		m_boolDone = true;
+		m_uiEncodingIterations++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// set the encoding bits based on encoding state
+	//
+	void Block4x4Encoding_RGBA8_Transparent::SetEncodingBits(void)
+	{
+
+		Block4x4Encoding_RGB8::SetEncodingBits();
+
+		// set the A8 portion
+		m_pencodingbitsA8->data.base = 0;
+		m_pencodingbitsA8->data.table = 0;
+		m_pencodingbitsA8->data.multiplier = 1;
+		m_pencodingbitsA8->data.selectors0 = 0;
+		m_pencodingbitsA8->data.selectors1 = 0;
+		m_pencodingbitsA8->data.selectors2 = 0;
+		m_pencodingbitsA8->data.selectors3 = 0;
+		m_pencodingbitsA8->data.selectors4 = 0;
+		m_pencodingbitsA8->data.selectors5 = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h
new file mode 100644
index 0000000..9d21e90
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcBlock4x4Encoding_RGBA8.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcBlock4x4Encoding_RGB8.h"
+
+namespace Etc
+{
+	class Block4x4EncodingBits_A8;
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8
+	// RGBA8 if not completely opaque or transparent
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8
+	{
+	public:
+
+		Block4x4Encoding_RGBA8(void);
+		virtual ~Block4x4Encoding_RGBA8(void);
+
+		virtual void InitFromSource(Block4x4 *a_pblockParent,
+									ColorFloatRGBA *a_pafrgbaSource,
+									unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric);
+
+		virtual void InitFromEncodingBits(Block4x4 *a_pblockParent,
+											unsigned char *a_paucEncodingBits,
+											ColorFloatRGBA *a_pafrgbaSource,
+											ErrorMetric a_errormetric);
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	protected:
+
+		static const unsigned int MODIFIER_TABLE_ENTRYS = 16;
+		static const unsigned int ALPHA_SELECTOR_BITS = 3;
+		static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
+
+		static float s_aafModifierTable[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS];
+
+		void CalculateA8(float a_fRadius);
+
+		Block4x4EncodingBits_A8 *m_pencodingbitsA8;	// A8 portion of Block4x4EncodingBits_RGBA8
+
+		float m_fBase;
+		float m_fMultiplier;
+		unsigned int m_uiModifierTableIndex;
+		unsigned int m_auiAlphaSelectors[PIXELS];
+
+	private:
+
+		inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier,
+										unsigned int a_uiTableIndex, unsigned int a_uiSelector)
+		{
+			float fPixelAlpha = a_fBase + 
+								a_fMultiplier*s_aafModifierTable[a_uiTableIndex][a_uiSelector];
+			if (fPixelAlpha < 0.0f)
+			{
+				fPixelAlpha = 0.0f;
+			}
+			else if (fPixelAlpha > 1.0f)
+			{
+				fPixelAlpha = 1.0f;
+			}
+
+			return fPixelAlpha;
+		}
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8_Opaque
+	// RGBA8 if all pixels have alpha==1
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8_Opaque : public Block4x4Encoding_RGBA8
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	};
+
+	// ################################################################################
+	// Block4x4Encoding_RGBA8_Transparent
+	// RGBA8 if all pixels have alpha==0
+	// ################################################################################
+
+	class Block4x4Encoding_RGBA8_Transparent : public Block4x4Encoding_RGBA8
+	{
+	public:
+
+		virtual void PerformIteration(float a_fEffort);
+
+		virtual void SetEncodingBits(void);
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcDifferentialTrys.cpp b/EtcLib/EtcCodec/EtcDifferentialTrys.cpp
new file mode 100644
index 0000000..0fcc550
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcDifferentialTrys.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcDifferentialTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcDifferentialTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+										const unsigned int *a_pauiPixelMapping1,
+										const unsigned int *a_pauiPixelMapping2,
+										unsigned int a_uiRadius,
+										int a_iGrayOffset1, int a_iGrayOffset2)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		m_boolSeverelyBentColors = false;
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius);
+		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius);
+
+		int iDeltaRed = iRed2 - iRed1;
+		int iDeltaGreen = iGreen2 - iGreen1;
+		int iDeltaBlue = iBlue2 - iBlue1;
+
+		// make sure components are within range
+		{
+			if (iDeltaRed > 3)
+			{
+				if (iDeltaRed > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed - 3) / 2;
+				iRed2 = iRed1 + 3;
+				iDeltaRed = 3;
+			}
+			else if (iDeltaRed < -4)
+			{
+				if (iDeltaRed < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iRed1 += (iDeltaRed + 4) / 2;
+				iRed2 = iRed1 - 4;
+				iDeltaRed = -4;
+			}
+			assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius));
+			assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaRed >= -4 && iDeltaRed <= 3);
+
+			if (iDeltaGreen > 3)
+			{
+				if (iDeltaGreen > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen - 3) / 2;
+				iGreen2 = iGreen1 + 3;
+				iDeltaGreen = 3;
+			}
+			else if (iDeltaGreen < -4)
+			{
+				if (iDeltaGreen < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iGreen1 += (iDeltaGreen + 4) / 2;
+				iGreen2 = iGreen1 - 4;
+				iDeltaGreen = -4;
+			}
+			assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius));
+			assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaGreen >= -4 && iDeltaGreen <= 3);
+
+			if (iDeltaBlue > 3)
+			{
+				if (iDeltaBlue > 7)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue - 3) / 2;
+				iBlue2 = iBlue1 + 3;
+				iDeltaBlue = 3;
+			}
+			else if (iDeltaBlue < -4)
+			{
+				if (iDeltaBlue < -8)
+				{
+					m_boolSeverelyBentColors = true;
+				}
+
+				iBlue1 += (iDeltaBlue + 4) / 2;
+				iBlue2 = iBlue1 - 4;
+				iDeltaBlue = -4;
+			}
+			assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius));
+			assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius));
+			assert(iDeltaBlue >= -4 && iDeltaBlue <= 3);
+		}
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, 
+										const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcDifferentialTrys.h b/EtcLib/EtcCodec/EtcDifferentialTrys.h
new file mode 100644
index 0000000..6b1cd9c
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcDifferentialTrys.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class DifferentialTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 2;
+
+		DifferentialTrys(ColorFloatRGBA a_frgbaColor1,
+							ColorFloatRGBA a_frgbaColor2,
+							const unsigned int *a_pauiPixelMapping1,
+							const unsigned int *a_pauiPixelMapping2,
+							unsigned int a_uiRadius,
+							int a_iGrayOffset1, int a_iGrayOffset2);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (31- a_iDistance))
+			{
+				return (31 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 125;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+		bool m_boolSeverelyBentColors;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcErrorMetric.h b/EtcLib/EtcCodec/EtcErrorMetric.h
new file mode 100644
index 0000000..29bb33b
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcErrorMetric.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace Etc
+{
+
+	enum ErrorMetric
+	{
+		RGBA,
+		REC709,
+		NUMERIC,
+		NORMALXYZ,
+		//
+		ERROR_METRICS,
+		//
+		BT709 = REC709
+	};
+
+	inline const char *ErrorMetricToString(ErrorMetric errorMetric)
+	{
+		switch (errorMetric)
+		{
+		case RGBA:
+			return "RGBA";
+		case REC709:
+			return "REC709";
+		case NUMERIC:
+			return "NUMERIC";
+		case NORMALXYZ:
+			return "NORMALXYZ";
+		case ERROR_METRICS:
+		default:
+			return "UNKNOWN";
+		}
+	}
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcIndividualTrys.cpp b/EtcLib/EtcCodec/EtcIndividualTrys.cpp
new file mode 100644
index 0000000..951edec
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcIndividualTrys.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcIndividualTrys.cpp
+
+Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcIndividualTrys.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct a list of trys (encoding attempts)
+	//
+	// a_frgbaColor1 is the basecolor for the first half
+	// a_frgbaColor2 is the basecolor for the second half
+	// a_pauiPixelMapping1 is the pixel order for the first half
+	// a_pauiPixelMapping2 is the pixel order for the second half
+	// a_uiRadius is the amount to vary the base colors
+	//
+	IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2,
+									const unsigned int *a_pauiPixelMapping1,
+									const unsigned int *a_pauiPixelMapping2,
+									unsigned int a_uiRadius)
+	{
+		assert(a_uiRadius <= MAX_RADIUS);
+
+		ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4();
+		ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4();
+
+		// quantize base colors
+		// ensure that trys with a_uiRadius don't overflow
+		int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius);
+		int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius);
+		int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius);
+		int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius);
+		int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius);
+		int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius);
+
+		m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius);
+		m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue,
+									const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius)
+	{
+
+		m_iRed = a_iRed;
+		m_iGreen = a_iGreen;
+		m_iBlue = a_iBlue;
+
+		m_pauiPixelMapping = a_pauiPixelMapping;
+		m_uiRadius = a_uiRadius;
+
+		m_uiTrys = 0;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcIndividualTrys.h b/EtcLib/EtcCodec/EtcIndividualTrys.h
new file mode 100644
index 0000000..49170d4
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcIndividualTrys.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class IndividualTrys
+	{
+	public:
+
+		static const unsigned int MAX_RADIUS = 1;
+
+		IndividualTrys(ColorFloatRGBA a_frgbaColor1,
+						ColorFloatRGBA a_frgbaColor2,
+						const unsigned int *a_pauiPixelMapping1,
+						const unsigned int *a_pauiPixelMapping2,
+						unsigned int a_uiRadius);
+
+		inline static int MoveAwayFromEdge(int a_i, int a_iDistance)
+		{
+			if (a_i < (0+ a_iDistance))
+			{
+				return (0 + a_iDistance);
+			}
+			else if (a_i > (15- a_iDistance))
+			{
+				return (15 - a_iDistance);
+			}
+
+			return a_i;
+		}
+
+		class Try
+		{
+        public :
+			static const unsigned int SELECTORS = 8;	// per half
+
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+			unsigned int m_uiCW;
+			unsigned int m_auiSelectors[SELECTORS];
+			float m_fError;
+        };
+
+		class Half
+		{
+		public:
+
+			static const unsigned int MAX_TRYS = 27;
+
+			void Init(int a_iRed, int a_iGreen, int a_iBlue, 
+						const unsigned int *a_pauiPixelMapping,
+						unsigned int a_uiRadius);
+
+			// center of trys
+			int m_iRed;
+			int m_iGreen;
+			int m_iBlue;
+
+			const unsigned int *m_pauiPixelMapping;
+			unsigned int m_uiRadius;
+
+			unsigned int m_uiTrys;
+			Try m_atry[MAX_TRYS];
+
+			Try *m_ptryBest;
+		};
+
+		Half m_half1;
+		Half m_half2;
+
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+} // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcSortedBlockList.cpp b/EtcLib/EtcCodec/EtcSortedBlockList.cpp
new file mode 100644
index 0000000..7f4f56e
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcSortedBlockList.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+EtcSortedBlockList.cpp
+
+SortedBlockList is a list of 4x4 blocks that can be used by the "effort" system to prioritize
+the encoding of the 4x4 blocks.
+
+The sorting is done with buckets, where each bucket is an indication of how much error each 4x4 block has
+
+*/
+
+#include "EtcConfig.h"
+#include "EtcSortedBlockList.h"
+
+#include "EtcBlock4x4.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	// construct an empty list
+	//
+	// allocate enough memory to add all of the image's 4x4 blocks later
+	// allocate enough buckets to sort the blocks
+	//
+	SortedBlockList::SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets)
+	{
+		m_uiImageBlocks = a_uiImageBlocks;
+		m_iBuckets = (int)a_uiBuckets;
+
+		m_uiAddedBlocks = 0;
+		m_uiSortedBlocks = 0;
+		m_palinkPool = new Link[m_uiImageBlocks];
+		m_pabucket = new Bucket[m_iBuckets];
+		m_fMaxError = 0.0f;
+
+		InitBuckets();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	SortedBlockList::~SortedBlockList(void)
+	{
+		delete[] m_palinkPool;
+		delete[] m_pabucket;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+    // add a 4x4 block to the list
+	// the 4x4 block will be sorted later
+	//
+    void SortedBlockList::AddBlock(Block4x4 *a_pblock)
+    {
+        assert(m_uiAddedBlocks < m_uiImageBlocks);
+        Link *plink = &m_palinkPool[m_uiAddedBlocks++];
+		plink->Init(a_pblock);
+    }
+
+	// ----------------------------------------------------------------------------------------------------
+	// sort all of the 4x4 blocks that have been added to the list
+	//
+	// first, determine the maximum error, then assign an error range to each bucket
+	// next, determine which bucket each 4x4 block belongs to based on the 4x4 block's error
+	// add the 4x4 block to the appropriate bucket
+	// lastly, walk thru the buckets and add each bucket to a sorted linked list
+	//
+	// the resultant sorting is an approximate sorting from most to least error
+	//
+    void SortedBlockList::Sort(void)
+    {
+		assert(m_uiAddedBlocks == m_uiImageBlocks);
+        InitBuckets();
+
+        // find max block error
+        m_fMaxError = -1.0f;
+
+        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
+        {
+            Link *plinkBlock = &m_palinkPool[uiLink];
+
+            float fBlockError = plinkBlock->GetBlock()->GetError();
+            if (fBlockError > m_fMaxError)
+            {
+                m_fMaxError = fBlockError;
+            }
+        }
+        // prevent divide by zero or divide by negative
+        if (m_fMaxError <= 0.0f)
+        {
+            m_fMaxError = 1.0f;
+        }
+		//used for debugging
+		//int numDone = 0;
+        // put all of the blocks with unfinished encodings into the appropriate bucket
+		m_uiSortedBlocks = 0;
+        for (unsigned int uiLink = 0; uiLink < m_uiAddedBlocks; uiLink++)
+        {
+            Link *plinkBlock = &m_palinkPool[uiLink];
+
+			// if the encoding is done, don't add it to the list
+			if (plinkBlock->GetBlock()->GetEncoding()->IsDone())
+			{
+				//numDone++;
+				continue;
+			}
+
+            // calculate the appropriate sort bucket
+            float fBlockError = plinkBlock->GetBlock()->GetError();
+            int iBucket = (int) floorf(m_iBuckets * fBlockError / m_fMaxError);
+            // clamp to bucket index
+            iBucket = iBucket < 0 ? 0 : iBucket >= m_iBuckets ? m_iBuckets - 1 : iBucket;
+
+            // add block to bucket
+			{
+				Bucket *pbucket = &m_pabucket[iBucket];
+				if (pbucket->plinkLast)
+				{
+					pbucket->plinkLast->SetNext(plinkBlock);
+					pbucket->plinkLast = plinkBlock;
+				}
+				else
+				{
+					pbucket->plinkFirst = pbucket->plinkLast = plinkBlock;
+				}
+				plinkBlock->SetNext(nullptr);
+			}
+
+			m_uiSortedBlocks++;
+
+            if (0)
+            {
+                printf("%u: e=%.3f\n", uiLink, fBlockError);
+                Print();
+                printf("\n\n\n");
+            }
+        }
+		//printf("num blocks already done: %d\n",numDone);
+		//link the blocks together across buckets
+		m_plinkFirst = nullptr;
+		m_plinkLast = nullptr;
+		for (int iBucket = m_iBuckets - 1; iBucket >= 0; iBucket--)
+		{
+			Bucket *pbucket = &m_pabucket[iBucket];
+
+			if (pbucket->plinkFirst)
+			{
+				if (m_plinkFirst == nullptr)
+				{
+					m_plinkFirst = pbucket->plinkFirst;
+				}
+				else
+				{
+					assert(pbucket->plinkLast->GetNext() == nullptr);
+					m_plinkLast->SetNext(pbucket->plinkFirst);
+				}
+
+				m_plinkLast = pbucket->plinkLast;
+			}
+		}
+
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// clear all of the buckets.  normally done in preparation for a sort
+	//
+	void SortedBlockList::InitBuckets(void)
+    {
+        for (int iBucket = 0; iBucket < m_iBuckets; iBucket++)
+        {
+            Bucket *pbucket = &m_pabucket[iBucket];
+
+            pbucket->plinkFirst = 0;
+            pbucket->plinkLast = 0;
+        }
+    }
+
+    // ----------------------------------------------------------------------------------------------------
+    // print out the list of sorted 4x4 blocks
+	// normally used for debugging
+	//
+    void SortedBlockList::Print(void)
+    {
+        for (int iBucket = m_iBuckets-1; iBucket >= 0; iBucket--)
+        {
+            Bucket *pbucket = &m_pabucket[iBucket];
+
+            unsigned int uiBlocks = 0;
+            for (Link *plink = pbucket->plinkFirst; plink != nullptr; plink = plink->GetNext() )
+            {
+                uiBlocks++;
+
+				if (plink == pbucket->plinkLast)
+				{
+					break;
+				}
+            }
+
+            float fBucketError = m_fMaxError * iBucket / m_iBuckets;
+            float fBucketRMS = sqrtf(fBucketError / (4.0f*16.0f) );
+            printf("%3d: e=%.3f rms=%.6f %u\n", iBucket, fBucketError, fBucketRMS, uiBlocks);
+        }
+    }
+
+    // ----------------------------------------------------------------------------------------------------
+    //
+
+}   // namespace Etc
diff --git a/EtcLib/EtcCodec/EtcSortedBlockList.h b/EtcLib/EtcCodec/EtcSortedBlockList.h
new file mode 100644
index 0000000..8ebb978
--- /dev/null
+++ b/EtcLib/EtcCodec/EtcSortedBlockList.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace Etc
+{
+	class Block4x4;
+
+    class SortedBlockList
+    {
+    public:
+
+		class Link
+		{
+		public:
+
+			inline void Init(Block4x4 *a_pblock)
+			{
+				m_pblock = a_pblock;
+				m_plinkNext = nullptr;
+			}
+
+			inline Block4x4 * GetBlock(void)
+			{
+				return m_pblock;
+			}
+
+			inline void SetNext(Link *a_plinkNext)
+			{
+				m_plinkNext = a_plinkNext;
+			}
+
+			inline Link * GetNext(void)
+			{
+				return m_plinkNext;
+			}
+
+			inline Link * Advance(unsigned int a_uiSteps = 1)
+			{
+				Link *plink = this;
+
+				for (unsigned int uiStep = 0; uiStep < a_uiSteps; uiStep++)
+				{
+					if (plink == nullptr)
+					{
+						break;
+					}
+
+					plink = plink->m_plinkNext;
+				}
+
+				return plink;
+			}
+
+		private:
+
+			Block4x4 *m_pblock;
+			Link *m_plinkNext;
+		};
+
+		SortedBlockList(unsigned int a_uiImageBlocks, unsigned int a_uiBuckets);
+		~SortedBlockList(void);
+
+        void AddBlock(Block4x4 *a_pblock);
+
+        void Sort(void);
+
+		inline Link * GetLinkToFirstBlock(void)
+		{
+			return m_plinkFirst;
+		}
+
+		inline unsigned int GetNumberOfAddedBlocks(void)
+		{
+			return m_uiAddedBlocks;
+		}
+
+		inline unsigned int GetNumberOfSortedBlocks(void)
+		{
+			return m_uiSortedBlocks;
+		}
+
+		void Print(void);
+
+	private:
+
+        void InitBuckets(void);
+
+        class Bucket
+        {
+        public:
+            Link *plinkFirst;
+            Link *plinkLast;
+        };
+
+        unsigned int m_uiImageBlocks;
+        int m_iBuckets;
+
+		unsigned int m_uiAddedBlocks;
+		unsigned int m_uiSortedBlocks;
+		Link *m_palinkPool;
+        Bucket *m_pabucket;
+        float m_fMaxError;
+
+		Link *m_plinkFirst;
+		Link *m_plinkLast;
+
+    };
+
+} // namespace Etc
diff --git a/EtcTool/Args.txt b/EtcTool/Args.txt
new file mode 100644
index 0000000..d69522b
--- /dev/null
+++ b/EtcTool/Args.txt
@@ -0,0 +1,7 @@
+C:\Users\BSI\Desktop\etc2comp\googleTest.png
+-format RGB8
+-errormetric rgba
+-output ../../EncodedImages/googleTest.ktx
+-analyze ../../Analysis/googleTest
+-verbose
+-effort 0
diff --git a/EtcTool/CMakeLists.txt b/EtcTool/CMakeLists.txt
new file mode 100644
index 0000000..ef79cb0
--- /dev/null
+++ b/EtcTool/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright 2015 The Etc2Comp Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+project(EtcTool)
+include_directories(../EtcLib/Etc)
+include_directories(../EtcLib/EtcCodec)
+include_directories(../third_party/lodepng)
+
+file(GLOB SOURCES
+	${PROJECT_SOURCE_DIR}/*.h
+	${PROJECT_SOURCE_DIR}/*.cpp
+	../third_party/lodepng/*.h
+	../third_party/lodepng/*.cpp)
+add_executable(EtcTool ${SOURCES})
+
+target_link_libraries (EtcTool EtcLib)
+
diff --git a/EtcTool/EtcAnalysis.cpp b/EtcTool/EtcAnalysis.cpp
new file mode 100644
index 0000000..4181e07
--- /dev/null
+++ b/EtcTool/EtcAnalysis.cpp
@@ -0,0 +1,410 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#define _CRT_SECURE_NO_WARNINGS (1)
+#endif
+
+#include "EtcAnalysis.h"
+
+#include "EtcTool.h"
+#include "EtcComparison.h"
+#include "Etc.h"
+#include "EtcFile.h"
+#include "EtcMath.h"
+#include "EtcImage.h"
+#include "EtcBlock4x4.h"
+
+#include "lodepng.h"
+#include <stdlib.h>
+#include <cmath> //sqrt fn()
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Analysis::Analysis(Image *a_pimage, const char *a_pstrOutputFolder)
+	{
+
+		m_pimage = a_pimage;
+		m_pstrOutputFolder = a_pstrOutputFolder;
+		m_uiComparisons = 0;
+
+		CreateNewDir(a_pstrOutputFolder);
+
+		// write log file
+		{
+			char strFilename[200];
+			sprintf(strFilename, "%s%cAnalysis.txt", m_pstrOutputFolder, ETC_PATH_SLASH);
+
+			FILE *pfileTxt = fopen(strFilename, "wt");
+			if (pfileTxt == nullptr)
+			{
+				printf("Error: couldn't create analysis log file (%s)\n", strFilename);
+				exit(1);
+			}
+
+			float fImageError = m_pimage->GetError();
+			unsigned int uiImagePixels = m_pimage->GetSourceWidth() * m_pimage->GetSourceHeight();
+			
+			// output stats to both stdout and the analysis file
+			int numOutputs = 1;
+			FILE *apfile[2];
+			apfile[0] = pfileTxt;
+			
+			if (m_pimage->m_bVerboseOutput)
+			{
+				apfile[1] = stdout;
+				numOutputs++;
+			}
+
+			for (int i = 0; i < numOutputs; i++)
+			{			
+				
+				if (a_pimage->GetFormat() == Image::Format::R11 || a_pimage->GetFormat() == Image::Format::SIGNED_R11)
+				{
+					fprintf(apfile[i], "PSNR(r) = %.4f\n", ConvertErrorToPSNR(fImageError, 1 * uiImagePixels));
+				}
+				else if (a_pimage->GetFormat() == Image::Format::RG11 || a_pimage->GetFormat() == Image::Format::SIGNED_RG11)
+				{
+					fprintf(apfile[i], "PSNR(rg) = %.4f\n", ConvertErrorToPSNR(fImageError, 2 * uiImagePixels));
+				}
+				else
+				{
+					int iComponents=3;
+					if (a_pimage->GetErrorMetric() == ErrorMetric::REC709)
+					{
+						iComponents = (int)Block4x4Encoding::LUMA_WEIGHT + 2;
+					}
+					fprintf(apfile[i], "PSNR(rgb) = %.4f\n", ConvertErrorToPSNR(fImageError, iComponents * uiImagePixels));
+					fprintf(apfile[i], "PSNR(rgba) = %.4f\n", ConvertErrorToPSNR(fImageError, (iComponents+1) * uiImagePixels));
+				}
+				
+				fprintf(apfile[i], "EncodeTime = %.3f seconds\n", (float)m_pimage->GetEncodingTimeMs() / 1000.0f);
+			}
+
+
+			fclose(pfileTxt);
+		}
+
+		// scale == 1
+		DrawImage(m_pimage, m_pstrOutputFolder, false);
+
+		// scale == 2, with modes
+		DrawImage(m_pimage, m_pstrOutputFolder, true);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Analysis::Compare(const char *a_pstrFilename, int a_iPixelX, int a_iPixelY)
+	{
+		if (m_uiComparisons >= MAX_COMPARISONS)
+		{
+			printf("Error: too many comparisons\n");
+			exit(1);
+		}
+
+		m_apcomparison[m_uiComparisons] = new Comparison(this, m_uiComparisons, a_pstrFilename, a_iPixelX, a_iPixelY);
+
+		m_uiComparisons++;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// draw 2x image
+	// optionally display encoding modes
+	void Analysis::DrawImage(Image *a_pimage, const char *a_pstrOutputFolder, bool a_boolDrawModes)
+	{
+		unsigned int uiPngWidth = a_pimage->GetExtendedWidth();
+		unsigned int uiPngHeight = a_pimage->GetExtendedHeight();
+		if (a_boolDrawModes)
+		{
+			uiPngWidth *= 2;
+			uiPngHeight *= 2;
+		}
+
+		unsigned char* paucPngPixels = new unsigned char[uiPngWidth * uiPngHeight * 4];
+		assert(paucPngPixels);
+
+		ColorR8G8B8A8 *pargba8PngPixels = (ColorR8G8B8A8 *)paucPngPixels;
+
+		for (unsigned int uiBlock = 0; uiBlock < a_pimage->GetNumberOfBlocks(); uiBlock++)
+		{
+			Block4x4 *pblock = &a_pimage->GetBlocks()[uiBlock];
+			if (a_boolDrawModes)
+			{
+				if (a_pimage->GetFormat() == Image::Format::R11 || a_pimage->GetFormat() == Image::Format::SIGNED_R11)
+				{
+					DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, true, 2);
+				}
+				else
+				{
+					DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, false, 2);
+				}
+				DrawBlockMode2x(pblock, pargba8PngPixels, uiPngWidth);
+			}
+			else
+			{
+				if (a_pimage->GetFormat() == Image::Format::R11 || a_pimage->GetFormat() == Image::Format::SIGNED_R11)
+				{
+					DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, true);
+				}
+				else
+				{
+					DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, false);
+				}
+			}
+		}
+
+		char strFilename[200];
+		if (a_boolDrawModes)
+		{
+			sprintf(strFilename, "%s%cModes.png", a_pstrOutputFolder, ETC_PATH_SLASH);
+		}
+		else
+		{
+			sprintf(strFilename, "%s%cDecoded.png", a_pstrOutputFolder, ETC_PATH_SLASH);
+		}
+
+		unsigned iResult = lodepng_encode32_file(strFilename, paucPngPixels, uiPngWidth, uiPngHeight);
+
+		if (iResult != 0)
+		{
+			if (a_boolDrawModes)
+			{
+				printf("Error couldn't write modes image (%s)\n", strFilename);
+			}
+			else
+			{
+				printf("Error couldn't write decoded image (%s)\n", strFilename);
+			}
+
+			exit(1);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Analysis::DrawBlockPixels(Block4x4 *a_pblock,
+									ColorR8G8B8A8 *a_pargba8Output,
+									unsigned int a_uiOutputWidth,
+									bool a_bGrayscale,
+									unsigned int a_uiScale)
+	{
+
+		// output pixel coord of upper left corner of block
+		unsigned int uiBlockH = a_uiScale * a_pblock->GetSourceH();
+		unsigned int uiBlockV = a_uiScale * a_pblock->GetSourceV();
+
+		ColorR8G8B8A8 *pargba8Block = &a_pargba8Output[uiBlockV*a_uiOutputWidth + uiBlockH];
+		ColorFloatRGBA *pafrgbaDecodedColor = a_pblock->GetDecodedColors();
+		float *pafDecodedAlpha = a_pblock->GetDecodedAlphas();
+
+		for (unsigned int uiPixel = 0; uiPixel < Block4x4::PIXELS; uiPixel++)
+		{
+			ColorFloatRGBA *pfrgba = &pafrgbaDecodedColor[uiPixel];
+			int iR;
+			int iG;
+			int iB;
+			if (a_bGrayscale)
+			{
+				iR = pfrgba->IntRed(255.0f);
+				iG = pfrgba->IntRed(255.0f);
+				iB = pfrgba->IntRed(255.0f);
+			}
+			else
+			{
+				iR = pfrgba->IntRed(255.0f);
+				iG = pfrgba->IntGreen(255.0f);
+				iB = pfrgba->IntBlue(255.0f);
+			}
+
+			int iA = (int) roundf((255.0f*pafDecodedAlpha[uiPixel]));
+
+			ColorR8G8B8A8 *pargba8ScaledPixel = &pargba8Block[(a_uiScale * (uiPixel % 4))*a_uiOutputWidth +
+												a_uiScale * (uiPixel / 4)];
+
+			// draw scaled pixel
+			for (unsigned int uiV = 0; uiV < a_uiScale; uiV++)
+			{
+				for (unsigned int uiH = 0; uiH < a_uiScale; uiH++)
+				{
+					ColorR8G8B8A8 *prgba8 = &pargba8ScaledPixel[uiV*a_uiOutputWidth + uiH];
+
+					prgba8->ucR = (unsigned char)iR;
+					prgba8->ucG = (unsigned char)iG;
+					prgba8->ucB = (unsigned char)iB;
+					prgba8->ucA = (unsigned char)iA;
+				}
+			}
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Analysis::DrawBlockMode2x(Block4x4 *a_pblock,
+									ColorR8G8B8A8 *a_pargba8Output,
+									unsigned int a_uiOutputWidth)
+	{
+		static const unsigned int SCALE = 2;
+
+		typedef struct
+		{
+			int iH;
+			int iV;
+		} PixelCoord;
+
+		static const PixelCoord s_apixelcoordOutline[] = {
+			{ 0,0 },{ 1,0 },{ 2,0 },{ 3,0 },{ 4,0 },{ 5,0 },{ 6,0 },{ 7,0 },
+			{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,5 },{ 0,6 },{ 0,7 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordFlip0[] =
+		{
+			{ 4,2 },{ 4,4 },{ 4,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordFlip1[] =
+		{
+			{ 2,4 },{ 4,4 },{ 6,4 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordBentDiffFlip0[] =
+		{
+			{ 4,2 },{ 4,3 },{ 4,5 },{ 4,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordBentDiffFlip1[] =
+		{
+			{ 2,4 },{ 3,4 },{ 5,4 },{ 6,4 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordIndividual[] =
+		{
+			{ 1,1 },{ 1,7 },{ 7,1 },{ 7,7 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordT[] =
+		{
+			{ 3,2 },{ 5,2 },
+			{ 4,2 },{ 4,3 },{ 4,4 },{ 4,5 },{ 4,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordH[] =
+		{
+			{ 4,4 },
+			{ 3,2 },{ 3,3 },{ 3,4 },{ 3,5 },{ 3,6 },
+			{ 5,2 },{ 5,3 },{ 5,4 },{ 5,5 },{ 5,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordPlanar[] =
+		{
+			{ 4,2 },{ 5,2 },{ 5,3 },{ 5,4 },{ 4,4 },
+			{ 3,2 },{ 3,3 },{ 3,4 },{ 3,5 },{ 3,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordUnknown[] =
+		{
+			{ 3,2 },{ 4,2 },{ 5,2 },{ 5,3 },{ 5,4 },{ 4,4 },{ 4,6 },{ -1,-1 }
+		};
+
+		// output pixel coord of upper left corner of block
+		unsigned int uiBlockH = SCALE * a_pblock->GetSourceH();
+		unsigned int uiBlockV = SCALE * a_pblock->GetSourceV();
+
+		ColorR8G8B8A8 *pargba8Block = &a_pargba8Output[uiBlockV*a_uiOutputWidth + uiBlockH];
+		ColorR8G8B8A8 rgba8Gray;
+		rgba8Gray.ucR = 128;
+		rgba8Gray.ucG = 128;
+		rgba8Gray.ucB = 128;
+		rgba8Gray.ucA = 255;
+
+		// outline
+		for (const PixelCoord *pcoord = s_apixelcoordOutline; pcoord->iH >= 0; pcoord++)
+		{
+			ColorR8G8B8A8 *prgba8 = &pargba8Block[pcoord->iV*a_uiOutputWidth + pcoord->iH];
+			*prgba8 = rgba8Gray;
+		}
+
+		const PixelCoord *pacoordMode = nullptr;
+
+		switch (a_pblock->GetEncodingMode())
+		{
+		case Block4x4Encoding::MODE_ETC1:
+
+			// H/V split
+			pacoordMode = a_pblock->GetFlip() ? s_apixelcoordFlip1 : s_apixelcoordFlip0;
+
+			// individial
+			if (a_pblock->IsDifferential() == false)
+			{
+				for (const PixelCoord *pcoord = s_apixelcoordIndividual; pcoord->iH >= 0; pcoord++)
+				{
+					ColorR8G8B8A8 *prgba8 = &pargba8Block[pcoord->iV*a_uiOutputWidth + pcoord->iH];
+					*prgba8 = rgba8Gray;
+				}
+			}
+			else if (a_pblock->GetEncoding()->HasSeverelyBentDifferentialColors())
+			{
+				pacoordMode = a_pblock->GetFlip() ? s_apixelcoordBentDiffFlip1 : s_apixelcoordBentDiffFlip0;
+			}
+			break;
+
+		case Block4x4Encoding::MODE_T:
+			pacoordMode = s_apixelcoordT;
+			break;
+
+		case Block4x4Encoding::MODE_H:
+			pacoordMode = s_apixelcoordH;
+			break;
+
+		case Block4x4Encoding::MODE_PLANAR:
+			pacoordMode = s_apixelcoordPlanar;
+			break;
+
+		default:
+			pacoordMode = s_apixelcoordUnknown;
+			break;
+		}
+
+		// draw mode
+		for (const PixelCoord *pcoord = pacoordMode; pcoord->iH >= 0; pcoord++)
+		{
+			ColorR8G8B8A8 *prgba8 = &pargba8Block[pcoord->iV*a_uiOutputWidth + pcoord->iH];
+			*prgba8 = rgba8Gray;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	float Analysis::ConvertErrorToPSNR(float a_fError, unsigned int a_uiTotalComponents)
+	{
+
+		float fMSE = a_fError / (float)a_uiTotalComponents;
+		float fPSNR = ConvertMSEToPSNR(fMSE);
+
+		return fPSNR;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}
diff --git a/EtcTool/EtcAnalysis.h b/EtcTool/EtcAnalysis.h
new file mode 100644
index 0000000..b814fd9
--- /dev/null
+++ b/EtcTool/EtcAnalysis.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColor.h"
+
+namespace Etc
+{
+
+	class Comparison;
+	class Image;
+	class Block4x4;
+
+	class Analysis
+	{
+	public:
+
+		static const unsigned int MAX_COMPARISONS = 4;
+
+		Analysis(Image *a_pimage, const char *a_pstrOutputFolder);
+
+		void Compare(const char *a_pstrFilename, int a_iPixelX = -1, int a_iPixelY = -1);
+
+		inline const char * GetOutputFolder(void)
+		{
+			return m_pstrOutputFolder;
+		}
+
+		static void DrawImage(Image *a_pimage, const char *a_pstrOutputFolder,
+								bool a_boolDrawModes);
+
+		inline Image * GetImage(void)
+		{
+			return m_pimage;
+		}
+
+		static void DrawBlockPixels(Block4x4 *a_pblock,
+									ColorR8G8B8A8 *a_pargba8Output,
+									unsigned int a_uiOutputWidth,
+									bool a_bGrayscale,
+									unsigned int a_uiScale = 1);
+
+		static float ConvertErrorToPSNR(float a_fError, unsigned int a_uiTotalComponents);
+
+	private:
+
+		static void DrawBlockMode2x(Block4x4 *a_pblock,
+									ColorR8G8B8A8 *a_pargba8Output,
+									unsigned int a_uiOutputWidth);
+
+		Image			*m_pimage;
+		const char		*m_pstrOutputFolder;
+		unsigned int	m_uiComparisons;
+		Comparison		*m_apcomparison[MAX_COMPARISONS];
+	};
+
+}
diff --git a/EtcTool/EtcComparison.cpp b/EtcTool/EtcComparison.cpp
new file mode 100644
index 0000000..b94ccb5
--- /dev/null
+++ b/EtcTool/EtcComparison.cpp
@@ -0,0 +1,637 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#define _CRT_SECURE_NO_WARNINGS (1)
+#endif
+
+#include "EtcConfig.h"
+
+#include "EtcComparison.h"
+
+#include "EtcAnalysis.h"
+#include "EtcTool.h"
+#include "Etc.h"
+#include "EtcFile.h"
+#include "EtcMath.h"
+#include "EtcImage.h"
+#include "EtcBlock4x4.h"
+#include "EtcBlock4x4Encoding_ETC1.h"
+#include "EtcBlock4x4Encoding_RGB8.h"
+#include "EtcBlock4x4Encoding_R11.h"
+#include "EtcBlock4x4Encoding_RG11.h"
+
+#include "lodepng.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include <limits>
+
+namespace Etc
+{
+	const float Comparison::ERROR_EPSILON = 0.000001f;
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	Comparison::Comparison(Analysis *a_panalysisParent, unsigned int a_uiIndex, const char *a_pstrFilename, int a_iPixelX, int a_iPixelY)
+	{
+
+		m_panalysisParent = a_panalysisParent;
+		m_uiIndex = a_uiIndex;
+		m_pstrFilename = new char[strlen(a_pstrFilename)+1];
+		strcpy(m_pstrFilename, a_pstrFilename);
+
+		SetName();
+
+		m_pstrOutputFolder = new char[256];
+		sprintf(m_pstrOutputFolder, "%s%cComparison_%s",
+				m_panalysisParent->GetOutputFolder(), ETC_PATH_SLASH, m_pstrName);
+
+		CreateNewDir(m_pstrOutputFolder);
+
+		// read etc file
+		Etc::File etcfile(a_pstrFilename, Etc::File::Format::INFER_FROM_FILE_EXTENSION);
+		
+		etcfile.UseSingleBlock(a_iPixelX, a_iPixelY);
+
+		// construct image with encoding bits
+		m_pimage = new Image(etcfile.GetImageFormat(),
+								etcfile.GetSourceWidth(),
+								etcfile.GetSourceHeight(),
+								etcfile.GetEncodingBits(),
+								etcfile.GetEncodingBitsBytes(),
+								a_panalysisParent->GetImage(),
+								a_panalysisParent->GetImage()->GetErrorMetric());
+
+		if (m_pimage->GetExtendedWidth() != a_panalysisParent->GetImage()->GetExtendedWidth() ||
+			m_pimage->GetExtendedHeight() != a_panalysisParent->GetImage()->GetExtendedHeight())
+		{
+			printf("Error: comparison image (%s) has different width or height\n", m_pstrFilename);
+			exit(1);
+		}
+
+		// scale = 1
+		Analysis::DrawImage(m_pimage, m_pstrOutputFolder, false);
+
+		// scale = 2, with modes
+		Analysis::DrawImage(m_pimage, m_pstrOutputFolder, true);
+
+		DrawImageComparison(m_panalysisParent->GetImage());
+
+		WriteLogFile();
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::DrawImageComparison(Image *a_pimageUnderTest)
+	{
+
+		assert(m_pimage->GetExtendedWidth() == a_pimageUnderTest->GetExtendedWidth() &&
+			m_pimage->GetExtendedHeight() == a_pimageUnderTest->GetExtendedHeight());
+
+		unsigned int uiPngWidth = 2 * m_pimage->GetExtendedWidth();
+		unsigned int uiPngHeight = 2 * m_pimage->GetExtendedHeight();
+
+		unsigned char* paucPngPixels = new unsigned char[uiPngWidth * uiPngHeight * 4];
+		assert(paucPngPixels);
+
+		ColorR8G8B8A8 *pargba8PngPixels = (ColorR8G8B8A8 *)paucPngPixels;
+
+		for (unsigned int uiBlock = 0; uiBlock < m_pimage->GetNumberOfBlocks(); uiBlock++)
+		{
+			Block4x4 *pblock = &m_pimage->GetBlocks()[uiBlock];
+			Block4x4 *pblockUnderTest = &a_pimageUnderTest->GetBlocks()[uiBlock];
+
+			if (a_pimageUnderTest->GetFormat() == Image::Format::R11 || a_pimageUnderTest->GetFormat() == Image::Format::SIGNED_R11)
+			{
+				Analysis::DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, true, 2);
+			}
+			else
+			{
+				Analysis::DrawBlockPixels(pblock, pargba8PngPixels, uiPngWidth, false, 2);
+			}
+
+			float fErrorBlockReference = pblock->GetError();
+			float fErrorBlockUnderTest = pblockUnderTest->GetError();
+
+			DrawBlockComparison2x(pblockUnderTest, pargba8PngPixels, uiPngWidth,
+									fErrorBlockUnderTest, fErrorBlockReference);
+		}
+
+		char strFilename[200];
+		sprintf(strFilename, "%s%cComparison.png", m_pstrOutputFolder, ETC_PATH_SLASH);
+
+		unsigned iResult = lodepng_encode32_file(strFilename, paucPngPixels, uiPngWidth, uiPngHeight);
+		
+		if (iResult != 0)
+		{
+			printf("Error couldn't write modes image (%s)\n", strFilename);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::DrawBlockComparison2x(Block4x4 *a_pblockUnderTest,
+											ColorR8G8B8A8 *a_pargba8Output,
+											unsigned int a_uiOutputWidth,
+											float a_fErrorBlockUnderTest,
+											float a_fErrorBlockReference)
+	{
+		static const unsigned int SCALE = 2;
+
+		typedef struct
+		{
+			int iH;
+			int iV;
+		} PixelCoord;
+
+		static const PixelCoord s_apixelcoordDifference0[] = {
+			{ 0,0 },{ 1,0 },{ 2,0 },{ 3,0 },{ 4,0 },{ 5,0 },{ 6,0 },{ 7,0 },
+			{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,5 },{ 0,6 },{ 0,7 },
+			{ 1,7 },{ 2,7 },{ 3,7 },{ 4,7 },{ 5,7 },{ 6,7 },{ 7,7 },
+			{ 7,1 },{ 7,2 },{ 7,3 },{ 7,4 },{ 7,5 },{ 7,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordDifference5[] = {
+			{ 3,3 },
+			{ 0,0 },{ 1,0 },{ 2,0 },{ 3,0 },{ 4,0 },{ 5,0 },{ 6,0 },{ 7,0 },
+			{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,5 },{ 0,6 },{ 0,7 },
+			{ 1,7 },{ 2,7 },{ 3,7 },{ 4,7 },{ 5,7 },{ 6,7 },{ 7,7 },
+			{ 7,1 },{ 7,2 },{ 7,3 },{ 7,4 },{ 7,5 },{ 7,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordDifference10[] = {
+			{ 3,3 },{ 3,4 },{ 4,3 },{ 4,4 },
+			{ 0,0 },{ 1,0 },{ 2,0 },{ 3,0 },{ 4,0 },{ 5,0 },{ 6,0 },{ 7,0 },
+			{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,5 },{ 0,6 },{ 0,7 },
+			{ 1,7 },{ 2,7 },{ 3,7 },{ 4,7 },{ 5,7 },{ 6,7 },{ 7,7 },
+			{ 7,1 },{ 7,2 },{ 7,3 },{ 7,4 },{ 7,5 },{ 7,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordDifference20[] = {
+			{ 2,2 },{ 2,3 },{ 3,2 },{ 3,3 },{ 4,4 },{ 4,5 },{ 5,4 },{ 5,5 },
+			{ 0,0 },{ 1,0 },{ 2,0 },{ 3,0 },{ 4,0 },{ 5,0 },{ 6,0 },{ 7,0 },
+			{ 0,1 },{ 0,2 },{ 0,3 },{ 0,4 },{ 0,5 },{ 0,6 },{ 0,7 },
+			{ 1,7 },{ 2,7 },{ 3,7 },{ 4,7 },{ 5,7 },{ 6,7 },{ 7,7 },
+			{ 7,1 },{ 7,2 },{ 7,3 },{ 7,4 },{ 7,5 },{ 7,6 },{ -1,-1 }
+		};
+
+		static const PixelCoord s_apixelcoordCorners[] = {
+			{ 0,0 },{ 7,0 },{ 0,7 },{ 7,7 },{ -1,-1 }
+		};
+
+		// output pixel coord of upper left corner of block
+		unsigned int uiBlockH = SCALE * a_pblockUnderTest->GetSourceH();
+		unsigned int uiBlockV = SCALE * a_pblockUnderTest->GetSourceV();
+
+		ColorR8G8B8A8 *pargba8Block = &a_pargba8Output[uiBlockV*a_uiOutputWidth + uiBlockH];
+
+		float fRelativeError = 0.0f;
+		if (a_fErrorBlockUnderTest != a_fErrorBlockReference)
+		{
+			fRelativeError = fabs((a_fErrorBlockUnderTest - a_fErrorBlockReference) /
+								(a_fErrorBlockUnderTest + a_fErrorBlockReference));
+
+			if (fabsf(a_fErrorBlockUnderTest - a_fErrorBlockReference) < ERROR_EPSILON)
+			{
+				fRelativeError = 0.0f;
+			}
+		}
+
+		ColorR8G8B8A8 rgb8Draw;
+		rgb8Draw.ucA = 255;
+
+		// equal
+		if (fRelativeError == 0.0f)
+		{
+			rgb8Draw.ucR = 128;
+			rgb8Draw.ucG = 128;
+			rgb8Draw.ucB = 128;
+		}
+		// better tthan reference
+		else if (a_fErrorBlockUnderTest < a_fErrorBlockReference)
+		{
+			rgb8Draw.ucR = 0;
+			rgb8Draw.ucG = 255;
+			rgb8Draw.ucB = 0;
+		}
+		// worse than reference
+		else if (a_fErrorBlockUnderTest > a_fErrorBlockReference)
+		{
+			rgb8Draw.ucR = 255;
+			rgb8Draw.ucG = 0;
+			rgb8Draw.ucB = 0;
+		}
+
+		const PixelCoord *papixelcoordDraw = s_apixelcoordCorners;
+
+		// if 20% worse
+		if (fRelativeError >= 1.44f)
+		{
+			papixelcoordDraw = s_apixelcoordDifference20;
+		}
+		// if 10% worse
+		else if (fRelativeError >= 1.21f)
+		{
+			papixelcoordDraw = s_apixelcoordDifference10;
+		}
+		// if 5% worse
+		else if (fRelativeError >= 1.1025f)
+		{
+			papixelcoordDraw = s_apixelcoordDifference5;
+		}
+		else if (fRelativeError > 0.0f)
+		{
+			papixelcoordDraw = s_apixelcoordDifference0;
+		}
+
+		// outline
+		for (const PixelCoord *pcoord = papixelcoordDraw; pcoord->iH >= 0; pcoord++)
+		{
+			ColorR8G8B8A8 *prgba8 = &pargba8Block[pcoord->iV*a_uiOutputWidth + pcoord->iH];
+
+			*prgba8 = rgb8Draw;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::SetName(void)
+	{
+		// alloc memory for name
+		unsigned int uiNameBytes = (unsigned int)strlen(m_pstrFilename) + 1;
+		if (uiNameBytes < 4)
+		{
+			uiNameBytes = 4;
+		}
+		m_pstrName = new char[uiNameBytes];
+		m_pstrName[0] = 0;
+
+		// first, try to find folder name of comparison image
+		{
+			char *pstr = new char[strlen(m_pstrFilename) + 1];
+			strcpy(pstr, m_pstrFilename);
+
+			int iLastSlash;
+			int iPenultimateSlash;
+
+			// find last slash
+			for (iLastSlash = (int)strlen(pstr) - 1; iLastSlash >= 0; iLastSlash--)
+			{
+				if (pstr[iLastSlash] == ETC_PATH_SLASH)
+				{
+					break;
+				}
+			}
+
+			// find penultimate slash
+			for (iPenultimateSlash = iLastSlash - 1; iPenultimateSlash >= 0; iPenultimateSlash--)
+			{
+				if (pstr[iPenultimateSlash] == ETC_PATH_SLASH)
+				{
+					break;
+				}
+			}
+
+			if (iLastSlash > 0)
+			{
+				pstr[iLastSlash] = 0;
+				strcpy(m_pstrName, &pstr[iPenultimateSlash] + 1);
+				assert(strlen(m_pstrName) > 0);
+			}
+
+			delete[] pstr;
+		}
+
+		// otherwise use index as name
+		if (m_pstrName[0] == 0)
+		{
+			sprintf(m_pstrName, "%u", m_uiIndex);
+		}
+
+	}
+		
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::WriteLogFile(void)
+	{
+
+		char strPath[200];
+		sprintf(strPath, "%s%cComparison.txt", m_pstrOutputFolder, ETC_PATH_SLASH);
+
+		FILE *pfile = fopen(strPath, "wt");
+		if (pfile == nullptr)
+		{
+			printf("Error couldn't open comparison log file (%s)\n", strPath);
+			exit(1);
+		}
+
+		float fImageError = m_panalysisParent->GetImage()->GetError();
+		unsigned int uiImagePixels = m_panalysisParent->GetImage()->GetSourceWidth() * 
+										m_panalysisParent->GetImage()->GetSourceHeight();
+
+		if (m_pimage->GetFormat() == Image::Format::R11 || m_pimage->GetFormat() == Image::Format::SIGNED_R11)
+		{
+			fprintf(pfile, "PSNR(r) = %.4f\n", Analysis::ConvertErrorToPSNR(fImageError, 1 * uiImagePixels));
+			fprintf(pfile, "\n");
+		}
+		else if (m_pimage->GetFormat() == Image::Format::RG11 || m_pimage->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			fprintf(pfile, "PSNR(rg) = %.4f\n", Analysis::ConvertErrorToPSNR(fImageError, 2 * uiImagePixels));
+			fprintf(pfile, "\n");
+		}
+		else
+		{
+			fprintf(pfile, "PSNR(rgb) = %.4f\n", Analysis::ConvertErrorToPSNR(fImageError, 3 * uiImagePixels));
+			fprintf(pfile, "PSNR(rgba) = %.4f\n", Analysis::ConvertErrorToPSNR(fImageError, 4 * uiImagePixels));
+			fprintf(pfile, "\n");
+		}
+		float fReferenceImageError = m_pimage->GetError();
+
+		fprintf(pfile, "reference image %s\n", m_pstrFilename);
+		if (m_pimage->GetFormat() == Image::Format::R11 || m_pimage->GetFormat() == Image::Format::SIGNED_R11)
+		{
+			fprintf(pfile, "reference PSNR(r) = %.4f\n",
+				Analysis::ConvertErrorToPSNR(fReferenceImageError, 1 * uiImagePixels));
+		}
+		else if (m_pimage->GetFormat() == Image::Format::RG11 || m_pimage->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			fprintf(pfile, "reference PSNR(rg) = %.4f\n",
+				Analysis::ConvertErrorToPSNR(fReferenceImageError, 2 * uiImagePixels));
+		}
+		else
+		{
+			fprintf(pfile, "reference PSNR(rgb) = %.4f\n",
+				Analysis::ConvertErrorToPSNR(fReferenceImageError, 3 * uiImagePixels));
+			fprintf(pfile, "reference PSNR(rgba) = %.4f\n",
+				Analysis::ConvertErrorToPSNR(fReferenceImageError, 4 * uiImagePixels));
+		}
+		WriteBetterOrWorseBlocks(pfile, false);
+		WriteBetterOrWorseBlocks(pfile, true);
+
+		fclose(pfile);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::WriteBetterOrWorseBlocks(FILE *a_pfile, bool a_boolWorse)
+	{
+
+		fprintf(a_pfile, "\n");
+		if (a_boolWorse)
+		{
+			fprintf(a_pfile, "Worse Blocks\n");
+		}
+		else
+		{
+			fprintf(a_pfile, "Better Blocks\n");
+		}
+
+		for (unsigned int uiBlock = 0; uiBlock < m_pimage->GetNumberOfBlocks(); uiBlock++)
+		{
+			Block4x4 *pblockReference = &m_pimage->GetBlocks()[uiBlock];
+			Block4x4 *pblockUnderTest = &m_panalysisParent->GetImage()->GetBlocks()[uiBlock];
+
+			float fErrorBlockReference = pblockReference->GetError();
+			float fErrorBlockUnderTest = pblockUnderTest->GetError();
+
+			if (fabsf(fErrorBlockUnderTest - fErrorBlockReference) >= ERROR_EPSILON)
+			{
+				if ((a_boolWorse && (fErrorBlockUnderTest > fErrorBlockReference)) ||
+					(!a_boolWorse && (fErrorBlockUnderTest < fErrorBlockReference)))
+				{
+					fprintf(a_pfile, "HV=%u,%u\n", 
+								pblockUnderTest->GetSourceH(), pblockUnderTest->GetSourceV());
+
+					WriteBlockInfo(a_pfile, pblockUnderTest, "bsi");
+					WriteBlockInfo(a_pfile, pblockReference, "ref");
+
+				}
+			}
+
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void Comparison::WriteBlockInfo(FILE *a_pfile, Block4x4 *a_pblock, const char *a_pstrEncoder)
+	{
+
+		Block4x4Encoding *pencoding = a_pblock->GetEncoding();
+
+		Block4x4Encoding_R11 *pencoding_r11 = static_cast<Block4x4Encoding_R11 *>(pencoding);
+		Block4x4Encoding_RG11 *pencoding_rg11 = static_cast<Block4x4Encoding_RG11 *>(pencoding);
+		Block4x4Encoding_RGB8 *pencoding_rgb8 = static_cast<Block4x4Encoding_RGB8 *>(pencoding);
+		
+		fprintf(a_pfile, "    %s:", a_pstrEncoder);
+
+		float fPSNR = 0.0f;
+		if (m_pimage->GetFormat() == Image::Format::R11 || m_pimage->GetFormat() == Image::Format::SIGNED_R11)
+		{
+			fPSNR = Analysis::ConvertErrorToPSNR(a_pblock->GetError(), 1 * 16);
+		}
+		else if (m_pimage->GetFormat() == Image::Format::RG11 || m_pimage->GetFormat() == Image::Format::SIGNED_RG11)
+		{
+			fPSNR = Analysis::ConvertErrorToPSNR(a_pblock->GetError(), 2 * 16);
+		}
+		else
+		{
+			fPSNR = Analysis::ConvertErrorToPSNR(a_pblock->GetError(), 3 * 16);
+		}
+		if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_RG11)
+		{
+			fprintf(a_pfile, "psnr=%6.3f\n", fPSNR);
+		}
+		else
+		{
+			fprintf(a_pfile, "psnr=%6.3f ", fPSNR);
+		}
+		bool boolWriteSelectors = false;
+
+		if (m_pimage->GetFormat() == Image::Format::RGBA8)
+		{
+			fprintf(a_pfile, "RGBA8 ");
+			fprintf(a_pfile, "alpha base(%4.0f) ", pencoding_r11->GetRedBase());
+			fprintf(a_pfile, "alpha multiplier(%3.0f) ", pencoding_r11->GetRedMultiplier());
+			fprintf(a_pfile, "alpha table index(%2d) ", pencoding_r11->GetRedTableIndex());
+		}
+		if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_ETC1)
+		{
+			fprintf(a_pfile, "ETC1(%c,%c) ", 
+						a_pblock->IsDifferential() ? 'D' : 'I',
+						a_pblock->GetFlip() ? 'V' : 'H' );
+
+			if (a_pblock->IsDifferential())
+			{
+				fprintf(a_pfile, "color1(%2d,%2d,%2d) ",
+					pencoding_rgb8->GetColor1().IntRed(31.0f),
+					pencoding_rgb8->GetColor1().IntGreen(31.0f),
+					pencoding_rgb8->GetColor1().IntBlue(31.0f) );
+				fprintf(a_pfile, "color2(%2d,%2d,%2d) ",
+					pencoding_rgb8->GetColor2().IntRed(31.0f),
+					pencoding_rgb8->GetColor2().IntGreen(31.0f),
+					pencoding_rgb8->GetColor2().IntBlue(31.0f));
+			}
+			else
+			{
+				fprintf(a_pfile, "color1(%2d,%2d,%2d) ",
+					pencoding_rgb8->GetColor1().IntRed(15.0f),
+					pencoding_rgb8->GetColor1().IntGreen(15.0f),
+					pencoding_rgb8->GetColor1().IntBlue(15.0f));
+				fprintf(a_pfile, "color2(%2d,%2d,%2d) ",
+					pencoding_rgb8->GetColor2().IntRed(15.0f),
+					pencoding_rgb8->GetColor2().IntGreen(15.0f),
+					pencoding_rgb8->GetColor2().IntBlue(15.0f));
+			}
+
+			fprintf(a_pfile, "cw1(%d) cw2(%d) ", pencoding_rgb8->GetCW1(), pencoding_rgb8->GetCW2());
+
+			boolWriteSelectors = true;
+		}
+		else if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_T ||
+			a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_H)
+		{
+			fprintf(a_pfile, "%-9s ", a_pblock->GetEncodingModeName());
+
+			fprintf(a_pfile, "color1(%2d,%2d,%2d) ",
+				pencoding_rgb8->GetColor1().IntRed(15.0f),
+				pencoding_rgb8->GetColor1().IntGreen(15.0f),
+				pencoding_rgb8->GetColor1().IntBlue(15.0f));
+			fprintf(a_pfile, "color2(%2d,%2d,%2d) ",
+				pencoding_rgb8->GetColor2().IntRed(15.0f),
+				pencoding_rgb8->GetColor2().IntGreen(15.0f),
+				pencoding_rgb8->GetColor2().IntBlue(15.0f));
+
+			fprintf(a_pfile, "cw1(%d)        ", pencoding_rgb8->GetCW1() );
+
+			boolWriteSelectors = true;
+		}
+		else if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_PLANAR)
+		{
+			fprintf(a_pfile, "%-9s ", a_pblock->GetEncodingModeName());
+
+			fprintf(a_pfile, "color1(%2d,%2d,%2d) ",
+				pencoding_rgb8->GetColor1().IntRed(63.0f),
+				pencoding_rgb8->GetColor1().IntGreen(127.0f),
+				pencoding_rgb8->GetColor1().IntBlue(63.0f));
+			fprintf(a_pfile, "color2(%2d,%2d,%2d) ",
+				pencoding_rgb8->GetColor2().IntRed(63.0f),
+				pencoding_rgb8->GetColor2().IntGreen(127.0f),
+				pencoding_rgb8->GetColor2().IntBlue(63.0f));
+			fprintf(a_pfile, "color3(%2d,%2d,%2d) ",
+				pencoding_rgb8->GetColor3().IntRed(63.0f),
+				pencoding_rgb8->GetColor3().IntGreen(127.0f),
+				pencoding_rgb8->GetColor3().IntBlue(63.0f));
+		}
+		else if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_R11)
+		{
+			fprintf(a_pfile, "R11 ");
+			fprintf(a_pfile, "base(%4.0f) ", pencoding_r11->GetRedBase());
+			fprintf(a_pfile, "multiplier(%3.0f) ", pencoding_r11->GetRedMultiplier());
+			fprintf(a_pfile, "table index(%2d) ", pencoding_r11->GetRedTableIndex());
+
+			boolWriteSelectors = true;
+		}
+		else if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_RG11)
+		{
+			fprintf(a_pfile, "RG11 image: %s\n", a_pstrEncoder);
+			fprintf(a_pfile, "red base(%4.0f)\n", pencoding_rg11->GetRedBase());
+			fprintf(a_pfile, "grn base(%4.0f)\n\n", pencoding_rg11->GetGrnBase());
+
+			fprintf(a_pfile, "red multiplier(%3.0f)\n", pencoding_rg11->GetRedMultiplier());
+			fprintf(a_pfile, "grn multiplier(%3.0f)\n\n", pencoding_rg11->GetGrnMultiplier());
+
+			fprintf(a_pfile, "red table index(%2d)\n", pencoding_rg11->GetRedTableIndex());
+			fprintf(a_pfile, "grn table index(%2d)\n\n", pencoding_rg11->GetGrnTableIndex());
+
+			boolWriteSelectors = true;
+		}
+		else
+		{
+			assert(0);
+		}
+
+		if (boolWriteSelectors)
+		{
+			size_t selectorStringSize = Block4x4Encoding::PIXELS*4;
+			char *redSelectors = new char[selectorStringSize];
+			char *grnSelectors = new char[selectorStringSize];
+			memset(&redSelectors[0], 0, selectorStringSize);
+			memset(&grnSelectors[0], 0, selectorStringSize);
+			
+			if (a_pblock->GetEncodingMode() != Block4x4Encoding::MODE_RG11)
+			{
+				fprintf(a_pfile, "selectors(");
+			}
+			
+			for (unsigned int uiPixel = 0; uiPixel < Block4x4Encoding::PIXELS; uiPixel++)
+			{
+				if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_R11)
+				{
+					fprintf(a_pfile, "%u", pencoding_r11->GetRedSelectors()[uiPixel]);
+				}
+				else if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_RG11)
+				{
+					char temp[2];
+					sprintf(temp,"%d",pencoding_rg11->GetRedSelectors()[uiPixel]);
+					strcat(redSelectors,temp);
+					sprintf(temp, "%d", pencoding_rg11->GetGrnSelectors()[uiPixel]);
+					strcat(grnSelectors, temp);
+
+					strcat(redSelectors, ",\0");
+					strcat(grnSelectors, ",\0");
+
+					fprintf(a_pfile, "pixel[%d]:(%f,%f,%f)\n", uiPixel,
+						pencoding->GetDecodedColors()[uiPixel].fR,
+						pencoding->GetDecodedColors()[uiPixel].fG,
+						pencoding->GetDecodedColors()[uiPixel].fB);
+
+				}
+				else
+				{
+					fprintf(a_pfile, "%u", pencoding_rgb8->GetSelectors()[uiPixel]);
+				}
+			}
+			if (a_pblock->GetEncodingMode() == Block4x4Encoding::MODE_RG11)
+			{
+				redSelectors[selectorStringSize - 1] = '\0';
+				grnSelectors[selectorStringSize - 1] = '\0';
+				fprintf(a_pfile, "selectors(red: %s)\n", redSelectors);
+				fprintf(a_pfile, "selectors(grn: %s)\n,", grnSelectors);
+				delete[] redSelectors;
+				redSelectors = NULL;
+				delete[] grnSelectors;
+				grnSelectors = NULL;
+			}
+			else
+			{
+				fprintf(a_pfile, ")");
+			}
+		}
+
+		fprintf(a_pfile, "\n");
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+}
diff --git a/EtcTool/EtcComparison.h b/EtcTool/EtcComparison.h
new file mode 100644
index 0000000..52b39f0
--- /dev/null
+++ b/EtcTool/EtcComparison.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColor.h"
+#include <stdio.h>
+
+namespace Etc
+{
+	class Analysis;
+	class Image;
+	class Block4x4;
+
+	class Comparison
+	{
+	public:
+
+		Comparison(Analysis *a_panalysisParent, unsigned int a_uiIndex, const char *a_pstrFilename, int a_iPixelX = -1, int a_iPixelY = -1);
+
+	private:
+
+		static const float ERROR_EPSILON;
+
+		void SetName(void);
+
+		void DrawImageComparison(Image *a_pimageUnderTest);
+
+		void DrawBlockComparison2x(Block4x4 *a_pblockUnderTest,
+									ColorR8G8B8A8 *a_pargba8Output,
+									unsigned int a_uiOutputWidth,
+									float a_fErrorBlockUnderTest,
+									float a_fErrorBlockReference);
+
+		void WriteLogFile(void);
+		void WriteBetterOrWorseBlocks(FILE *a_pfile, bool a_boolWorse);
+		void WriteBlockInfo(FILE *a_pfile, Block4x4 *a_pblock, const char *a_pstrEncoder);
+
+		Analysis		*m_panalysisParent;
+		unsigned int	m_uiIndex;
+		char			*m_pstrFilename;
+		Image			*m_pimage;
+		char			*m_pstrName;
+		char			*m_pstrOutputFolder;
+	};
+
+
+}
diff --git a/EtcTool/EtcFile.cpp b/EtcTool/EtcFile.cpp
new file mode 100644
index 0000000..f57a0b9
--- /dev/null
+++ b/EtcTool/EtcFile.cpp
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#define _CRT_SECURE_NO_WARNINGS (1)
+#endif
+
+#include "EtcConfig.h"
+
+
+#include "EtcFile.h"
+
+#include "EtcFileHeader.h"
+#include "EtcColor.h"
+#include "Etc.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+using namespace Etc;
+
+// ----------------------------------------------------------------------------------------------------
+//
+File::File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
+			unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
+			unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+			unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight)
+{
+	if (a_pstrFilename == nullptr)
+	{
+		m_pstrFilename = const_cast<char *>("");
+	}
+	else
+	{
+		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
+		strcpy(m_pstrFilename, a_pstrFilename);
+	}
+
+	m_fileformat = a_fileformat;
+	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
+	{
+		// ***** TODO: add this later *****
+		m_fileformat = Format::KTX;
+	}
+
+	m_imageformat = a_imageformat;
+
+	m_paucEncodingBits = a_paucEncodingBits;
+	m_uiEncodingBitsBytes = a_uiEncodingBitsBytes;
+	m_uiSourceWidth = a_uiSourceWidth;
+	m_uiSourceHeight = a_uiSourceHeight;
+	m_uiExtendedWidth = a_uiExtendedWidth;
+	m_uiExtendedHeight = a_uiExtendedHeight;
+
+	switch (m_fileformat)
+	{
+	case Format::PKM:
+		m_pheader = new FileHeader_Pkm(this);
+		break;
+
+	case Format::KTX:
+		m_pheader = new FileHeader_Ktx(this);
+		break;
+
+	default:
+		assert(0);
+		break;
+	}
+
+}
+
+
+// ----------------------------------------------------------------------------------------------------
+//
+File::File(const char *a_pstrFilename, Format a_fileformat)
+{
+	if (a_pstrFilename == nullptr)
+	{
+		return;
+	}
+	else
+	{
+		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
+		strcpy(m_pstrFilename, a_pstrFilename);
+	}
+
+	m_fileformat = a_fileformat;
+	if (m_fileformat == Format::INFER_FROM_FILE_EXTENSION)
+	{
+		// ***** TODO: add this later *****
+		m_fileformat = Format::KTX;
+	}
+
+	FILE *pfile = fopen(m_pstrFilename, "rb");
+	if (pfile == nullptr)
+	{
+		printf("ERROR: Couldn't open %s", m_pstrFilename);
+		exit(1);
+	}
+	fseek(pfile, 0, SEEK_END);
+	unsigned int fileSize = ftell(pfile);
+	fseek(pfile, 0, SEEK_SET);
+	size_t szResult;
+
+	m_pheader = new FileHeader_Ktx(this);
+	szResult = fread( ((FileHeader_Ktx*)m_pheader)->GetData(), 1, sizeof(FileHeader_Ktx::Data), pfile);
+	assert(szResult > 0);
+
+	if (((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData > 0)
+		fseek(pfile, ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32BytesOfKeyValueData, SEEK_CUR);
+	szResult = fread(&m_uiEncodingBitsBytes, 1, sizeof(unsigned int), pfile);
+	assert(szResult > 0);
+
+	m_paucEncodingBits = new unsigned char[m_uiEncodingBitsBytes];
+	assert(ftell(pfile) + m_uiEncodingBitsBytes <= fileSize);
+	szResult = fread(m_paucEncodingBits, 1, m_uiEncodingBitsBytes, pfile);
+	assert(szResult == m_uiEncodingBitsBytes);
+
+	uint32_t uiInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlInternalFormat;
+	uint32_t uiBaseInternalFormat = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32GlBaseInternalFormat;
+	
+	if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC1_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC1_RGB8)
+	{
+		m_imageformat = Image::Format::ETC1;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8)
+	{
+		m_imageformat = Image::Format::RGB8;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGB8A1 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGB8A1)
+	{
+		m_imageformat = Image::Format::RGB8A1;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RGBA8 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RGBA8)
+	{
+		m_imageformat = Image::Format::RGBA8;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
+	{
+		m_imageformat = Image::Format::R11;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_R11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_R11)
+	{
+		m_imageformat = Image::Format::SIGNED_R11;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
+	{
+		m_imageformat = Image::Format::RG11;
+	}
+	else if (uiInternalFormat == (uint32_t)FileHeader_Ktx::InternalFormat::ETC2_SIGNED_RG11 && uiBaseInternalFormat == (uint32_t)FileHeader_Ktx::BaseInternalFormat::ETC2_RG11)
+	{
+		m_imageformat = Image::Format::SIGNED_RG11;
+	}
+	else
+	{
+		m_imageformat = Image::Format::UNKNOWN;
+	}
+
+	m_uiSourceWidth = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelWidth;
+	m_uiSourceHeight = ((FileHeader_Ktx*)m_pheader)->GetData()->m_u32PixelHeight;
+	m_uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+	m_uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+	unsigned int uiBlocks = m_uiExtendedWidth * m_uiExtendedHeight / 16;
+	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
+	unsigned int expectedbytes = uiBlocks * Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
+	assert(expectedbytes == m_uiEncodingBitsBytes);
+
+	fclose(pfile);
+}
+
+File::~File()
+{
+
+	if (m_paucEncodingBits != nullptr)
+	{
+		delete[] m_paucEncodingBits;
+		m_paucEncodingBits = nullptr;
+	}
+	if(m_pstrFilename != nullptr)
+	{
+		delete[] m_pstrFilename;
+		m_pstrFilename = nullptr;
+	}
+
+	if (m_pheader != nullptr)
+	{
+		delete m_pheader;
+		m_pheader = nullptr;
+	}
+}
+
+void File::UseSingleBlock(int a_iPixelX, int a_iPixelY)
+{
+	if (a_iPixelX <= -1 || a_iPixelY <= -1)
+		return;
+	if (a_iPixelX >(int) m_uiSourceWidth)
+	{
+		//if we are using a ktx thats the size of a single block or less
+		//then make sure we use the 4x4 image as the single block
+		if (m_uiSourceWidth <= 4)
+		{
+			a_iPixelX = 0;
+		}
+		else
+		{
+			printf("blockAtHV: H coordinate out of range, capped to image width\n");
+			a_iPixelX = m_uiSourceWidth - 1;
+		}
+	}
+	if (a_iPixelY >(int) m_uiSourceHeight)
+	{
+		//if we are using a ktx thats the size of a single block or less
+		//then make sure we use the 4x4 image as the single block
+		if (m_uiSourceHeight <= 4)
+		{
+			a_iPixelY= 0;
+		}
+		else
+		{
+			printf("blockAtHV: V coordinate out of range, capped to image height\n");
+			a_iPixelY = m_uiSourceHeight - 1;
+		}
+	}
+
+	unsigned int origWidth = m_uiSourceWidth;
+	unsigned int origHeight = m_uiSourceHeight;
+
+	m_uiSourceWidth = 4;
+	m_uiSourceHeight = 4;
+	m_uiExtendedWidth = Image::CalcExtendedDimension((unsigned short)m_uiSourceWidth);
+	m_uiExtendedHeight = Image::CalcExtendedDimension((unsigned short)m_uiSourceHeight);
+
+	//block position in pixels
+	// remove the bottom 2 bits to get the block coordinates 
+	unsigned int iBlockPosX = (a_iPixelX & 0xFFFFFFFC);
+	unsigned int iBlockPosY = (a_iPixelY & 0xFFFFFFFC);
+
+	int numXBlocks = (origWidth / 4);
+	int numYBlocks = (origHeight / 4);
+	
+	Block4x4EncodingBits::Format encodingbitsformat = Image::DetermineEncodingBitsFormat(m_imageformat);
+	unsigned int uiEncodingBitsBytesPerBlock = Block4x4EncodingBits::GetBytesPerBlock(encodingbitsformat);
+
+	// block location 
+	//int iBlockX = (a_iPixelX % 4) == 0 ? a_iPixelX / 4.0f : (a_iPixelX / 4) + 1;
+	//int iBlockY = (a_iPixelY % 4) == 0 ? a_iPixelY / 4.0f : (a_iPixelY / 4) + 1;
+	//m_paucEncodingBits += ((iBlockY * numXBlocks) + iBlockX) * uiEncodingBitsBytesPerBlock;
+
+	
+	unsigned int num = numXBlocks*numYBlocks;
+	unsigned int uiH = 0, uiV = 0;
+	for (unsigned int uiBlock = 0; uiBlock < num; uiBlock++)
+	{
+		if (uiH == iBlockPosX && uiV == iBlockPosY)
+		{
+			break;
+		}
+		m_paucEncodingBits += uiEncodingBitsBytesPerBlock;
+		uiH += 4;
+
+		if (uiH >= origWidth)
+		{
+			uiH = 0;
+			uiV += 4;
+		}
+	}
+}
+// ----------------------------------------------------------------------------------------------------
+//
+void File::Write()
+{
+
+	FILE *pfile = fopen(m_pstrFilename, "wb");
+	if (pfile == nullptr)
+	{
+		printf("Error: couldn't open Etc file (%s)\n", m_pstrFilename);
+		exit(1);
+	}
+
+	m_pheader->Write(pfile);
+	unsigned int iResult = (int)fwrite(m_paucEncodingBits, 1, m_uiEncodingBitsBytes, pfile);
+	if (iResult != m_uiEncodingBitsBytes)
+	{
+		printf("Error: couldn't write Etc file (%s)\n", m_pstrFilename);
+		exit(1);
+	}
+
+	fclose(pfile);
+
+}
+
+// ----------------------------------------------------------------------------------------------------
+//
+
diff --git a/EtcTool/EtcFile.h b/EtcTool/EtcFile.h
new file mode 100644
index 0000000..d55b050
--- /dev/null
+++ b/EtcTool/EtcFile.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+#include "EtcImage.h"
+
+namespace Etc
+{
+	class FileHeader;
+	class SourceImage;
+
+	class File
+	{
+	public:
+
+		enum class Format
+		{
+			INFER_FROM_FILE_EXTENSION,
+			PKM,
+			KTX,
+		};
+
+		File(const char *a_pstrFilename, Format a_fileformat, Image::Format a_imageformat,
+				unsigned char *a_paucEncodingBits, unsigned int a_uiEncodingBitsBytes,
+				unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight,
+				unsigned int a_uiExtendedWidth, unsigned int a_uiExtendedHeight);
+
+		File(const char *a_pstrFilename, Format a_fileformat);
+		~File();
+		const char *GetFilename(void) { return m_pstrFilename; }
+
+		void Read(const char *a_pstrFilename);
+		void Write(void);
+
+		inline unsigned int GetSourceWidth(void)
+		{
+			return m_uiSourceWidth;
+		}
+
+		inline unsigned int GetSourceHeight(void)
+		{
+			return m_uiSourceHeight;
+		}
+
+		inline unsigned int GetExtendedWidth(void)
+		{
+			return m_uiExtendedWidth;
+		}
+
+		inline unsigned int GetExtendedHeight(void)
+		{
+			return m_uiExtendedHeight;
+		}
+
+		inline Image::Format GetImageFormat()
+		{
+			return m_imageformat;
+		}
+
+		inline unsigned int GetEncodingBitsBytes()
+		{
+			return m_uiEncodingBitsBytes;
+		}
+
+		inline unsigned char * GetEncodingBits()
+		{
+			return m_paucEncodingBits;
+		}
+		void UseSingleBlock(int a_iPixelX = -1, int a_iPixelY = -1);
+	private:
+
+		char *m_pstrFilename;               // includes directory path and file extension
+		Format m_fileformat;
+		Image::Format m_imageformat;
+		FileHeader *m_pheader;
+		unsigned char *m_paucEncodingBits;
+		unsigned int m_uiEncodingBitsBytes;
+		unsigned int m_uiSourceWidth;
+		unsigned int m_uiSourceHeight;
+		unsigned int m_uiExtendedWidth;
+		unsigned int m_uiExtendedHeight;
+	};
+
+}
diff --git a/EtcTool/EtcFileHeader.cpp b/EtcTool/EtcFileHeader.cpp
new file mode 100644
index 0000000..19f0662
--- /dev/null
+++ b/EtcTool/EtcFileHeader.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcFileHeader.h"
+
+#include "EtcBlock4x4EncodingBits.h"
+
+#include <assert.h>
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	FileHeader_Pkm::FileHeader_Pkm(File *a_pfile)
+	{
+		m_pfile = a_pfile;
+
+		static const char s_acMagicNumberData[4] = { 'P', 'K', 'M', ' ' };
+		static const char s_acVersionData[2] = { '1', '0' };
+
+		for (unsigned int ui = 0; ui < sizeof(s_acMagicNumberData); ui++)
+		{
+			m_data.m_acMagicNumber[ui] = s_acMagicNumberData[ui];
+		}
+
+		for (unsigned int ui = 0; ui < sizeof(s_acVersionData); ui++)
+		{
+			m_data.m_acVersion[ui] = s_acVersionData[ui];
+		}
+
+		m_data.m_ucDataType_msb = 0;        // ETC1_RGB_NO_MIPMAPS
+		m_data.m_ucDataType_lsb = 0;
+
+		m_data.m_ucOriginalWidth_msb = (unsigned char)(m_pfile->GetSourceWidth() >> 8);
+		m_data.m_ucOriginalWidth_lsb = m_pfile->GetSourceWidth() & 0xFF;
+		m_data.m_ucOriginalHeight_msb = (unsigned char)(m_pfile->GetSourceHeight() >> 8);
+		m_data.m_ucOriginalHeight_lsb = m_pfile->GetSourceHeight() & 0xFF;
+
+		m_data.m_ucExtendedWidth_msb = (unsigned char)(m_pfile->GetExtendedWidth() >> 8);
+		m_data.m_ucExtendedWidth_lsb = m_pfile->GetExtendedWidth() & 0xFF;
+		m_data.m_ucExtendedHeight_msb = (unsigned char)(m_pfile->GetExtendedHeight() >> 8);
+		m_data.m_ucExtendedHeight_lsb = m_pfile->GetExtendedHeight() & 0xFF;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void FileHeader_Pkm::Write(FILE *a_pfile)
+	{
+
+		fwrite(&m_data, sizeof(Data), 1, a_pfile);
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	FileHeader_Ktx::FileHeader_Ktx(File *a_pfile)
+	{
+		m_pfile = a_pfile;
+
+		static const uint8_t s_au8Itentfier[12] =
+		{ 
+			0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier
+			0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier
+			0x0D, 0x0A, 0x1A, 0x0A  // final four bytes of Byte[12] identifier
+		};
+
+		for (unsigned int ui = 0; ui < sizeof(s_au8Itentfier); ui++)
+		{
+			m_data.m_au8Identifier[ui] = s_au8Itentfier[ui];
+		}
+
+		m_data.m_u32Endianness				= 0x04030201;
+		m_data.m_u32GlType					= 0;
+		m_data.m_u32GlTypeSize				= 1;
+		m_data.m_u32GlFormat				= 0;
+
+		switch (m_pfile->GetImageFormat())
+		{
+		case Image::Format::RGB8:
+		case Image::Format::SRGB8:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8;
+			break;
+
+		case Image::Format::RGBA8:
+		case Image::Format::SRGBA8:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGBA8;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGBA8;
+			break;
+
+		case Image::Format::RGB8A1:
+		case Image::Format::SRGB8A1:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RGB8A1;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RGB8A1;
+			break;
+		
+		case Image::Format::R11:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_R11;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
+			break;
+
+		case Image::Format::SIGNED_R11:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_R11;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_R11;
+			break;
+		
+		case Image::Format::RG11:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_RG11;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
+			break;
+
+		case Image::Format::SIGNED_RG11:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC2_SIGNED_RG11;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC2_RG11;
+			break;
+
+		default:
+			m_data.m_u32GlInternalFormat = (unsigned int)InternalFormat::ETC1_RGB8;
+			m_data.m_u32GlBaseInternalFormat = (unsigned int)BaseInternalFormat::ETC1_RGB8;
+			break;
+		}
+
+		m_data.m_u32PixelWidth				= 0;
+		m_data.m_u32PixelHeight				= 0;
+		m_data.m_u32PixelDepth				= 0;
+		m_data.m_u32NumberOfArrayElements	= 0;
+		m_data.m_u32NumberOfFaces			= 0;
+		m_data.m_u32NumberOfMipmapLevels	= 0;
+		m_data.m_u32BytesOfKeyValueData		= 0;
+
+		m_pkeyvaluepair = nullptr;
+
+		m_u32Images = 0;
+		m_u32KeyValuePairs = 0;
+
+		m_data.m_u32PixelWidth = m_pfile->GetSourceWidth();
+		m_data.m_u32PixelHeight = m_pfile->GetSourceHeight();
+		m_data.m_u32PixelDepth = 0;
+		m_data.m_u32NumberOfArrayElements = 0;
+		m_data.m_u32NumberOfFaces = 1;
+		m_data.m_u32NumberOfMipmapLevels = 1;
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void FileHeader_Ktx::Write(FILE *a_pfile)
+	{
+		size_t szBytesWritten;
+
+		// Write header
+		szBytesWritten = fwrite(&m_data, 1, sizeof(Data), a_pfile);
+		assert(szBytesWritten == sizeof(Data));
+
+		// Write KeyAndValuePairs
+		if (m_u32KeyValuePairs)
+		{
+			fwrite(m_pkeyvaluepair, m_pkeyvaluepair->u32KeyAndValueByteSize, 1, a_pfile);
+		}
+		
+		// Write u32 image size
+		uint32_t u32ImageSize = m_pfile->GetEncodingBitsBytes();
+		szBytesWritten = fwrite(&u32ImageSize, 1, sizeof(u32ImageSize), a_pfile);
+		assert(szBytesWritten == sizeof(u32ImageSize));
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	FileHeader_Ktx::Data *FileHeader_Ktx::GetData()
+	{
+		return &m_data;
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+} // namespace Etc
diff --git a/EtcTool/EtcFileHeader.h b/EtcTool/EtcFileHeader.h
new file mode 100644
index 0000000..4356afe
--- /dev/null
+++ b/EtcTool/EtcFileHeader.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcFile.h"
+#include <stdio.h>
+#include <inttypes.h>
+
+namespace Etc
+{
+
+	class Image;
+
+	class FileHeader
+	{
+	public:
+
+		virtual void Write(FILE *a_pfile) = 0;
+		File GetFile();
+		virtual ~FileHeader(void) {}
+	protected:
+
+		File *m_pfile;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+    class FileHeader_Pkm : public FileHeader
+    {
+    public:
+
+		FileHeader_Pkm(File *a_pfile);
+
+		virtual void Write(FILE *a_pfile);
+		virtual ~FileHeader_Pkm(void) {}
+	private:
+
+		typedef struct
+		{
+			char m_acMagicNumber[4];
+			char m_acVersion[2];
+			unsigned char m_ucDataType_msb;             // e.g. ETC1_RGB_NO_MIPMAPS
+			unsigned char m_ucDataType_lsb;
+			unsigned char m_ucExtendedWidth_msb;     //  padded to 4x4 blocks
+			unsigned char m_ucExtendedWidth_lsb;
+			unsigned char m_ucExtendedHeight_msb;    //  padded to 4x4 blocks
+			unsigned char m_ucExtendedHeight_lsb;
+			unsigned char m_ucOriginalWidth_msb;
+			unsigned char m_ucOriginalWidth_lsb;
+			unsigned char m_ucOriginalHeight_msb;
+			unsigned char m_ucOriginalHeight_lsb;
+		} Data;
+
+		Data m_data;
+	};
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+    class FileHeader_Ktx : public FileHeader
+    {
+    public:
+
+		typedef struct
+		{
+			uint32_t	u32KeyAndValueByteSize;
+		} KeyValuePair;
+
+		typedef struct
+		{
+			uint8_t m_au8Identifier[12];
+			uint32_t m_u32Endianness;
+			uint32_t m_u32GlType;
+			uint32_t m_u32GlTypeSize;
+			uint32_t m_u32GlFormat;
+			uint32_t m_u32GlInternalFormat;
+			uint32_t m_u32GlBaseInternalFormat;
+			uint32_t m_u32PixelWidth;
+			uint32_t m_u32PixelHeight;
+			uint32_t m_u32PixelDepth;
+			uint32_t m_u32NumberOfArrayElements;
+			uint32_t m_u32NumberOfFaces;
+			uint32_t m_u32NumberOfMipmapLevels;
+			uint32_t m_u32BytesOfKeyValueData;
+		} Data;
+
+		enum class InternalFormat
+		{
+			ETC1_RGB8 = 0x8D64,
+			ETC1_ALPHA8 = ETC1_RGB8,
+			//
+			ETC2_R11 = 0x9270,
+			ETC2_SIGNED_R11 = 0x9271,
+			ETC2_RG11 = 0x9272,
+			ETC2_SIGNED_RG11 = 0x9273,
+			ETC2_RGB8 = 0x9274,
+			ETC2_SRGB8 = 0x9275,
+			ETC2_RGB8A1 = 0x9276,
+			ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277,
+			ETC2_RGBA8 = 0x9278
+		};
+
+		enum class BaseInternalFormat
+		{
+			ETC2_R11 = 0x1903,
+			ETC2_RG11 = 0x8227,
+			ETC1_RGB8 = 0x1907,
+			ETC1_ALPHA8 = ETC1_RGB8,
+			//
+			ETC2_RGB8 = 0x1907,
+			ETC2_RGB8A1 = 0x1908,
+			ETC2_RGBA8 = 0x1908,
+		};
+
+		FileHeader_Ktx(File *a_pfile);
+
+		virtual void Write(FILE *a_pfile);
+		virtual ~FileHeader_Ktx(void) {}
+
+		void AddKeyAndValue(KeyValuePair *a_pkeyvaluepair);
+
+		Data* GetData();
+
+	private:
+
+		Data m_data;
+		KeyValuePair *m_pkeyvaluepair;
+		
+		uint32_t m_u32Images;
+		uint32_t m_u32KeyValuePairs;
+	};
+
+} // namespace Etc
diff --git a/EtcTool/EtcMemTest.cpp b/EtcTool/EtcMemTest.cpp
new file mode 100644
index 0000000..855f1be
--- /dev/null
+++ b/EtcTool/EtcMemTest.cpp
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "EtcMemTest.h"
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <ctime>
+using namespace std;
+
+#if defined(_WIN32)
+#include <Windows.h>
+#include <psapi.h>
+// Use to convert bytes to KB
+#define DIV 1024
+
+// Specify the width of the field in which to print the numbers. 
+// The asterisk in the format specifier "%*I64d" takes an integer 
+// argument and uses it to pad and right justify the number.
+#define WIDTH 7
+
+size_t GetMemoryUsageAmount()
+{
+	//MEMORYSTATUS status; // if 64 bit version isnt working, use 32bit version
+	//GlobalMemoryStatus(&status);
+	/*MEMORYSTATUSEX status;
+	status.dwLength = sizeof(status);
+	GlobalMemoryStatusEx(&status);
+	return (size_t)(status.ullAvailPhys / DIV);*/
+
+	PROCESS_MEMORY_COUNTERS_EX pmc;
+	DWORD ret = GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
+	if (ret == 0)
+	{
+		printf("GetProcessMemoryInfo failed with code %d\n", GetLastError());
+	}
+	
+	return pmc.PrivateUsage;
+}
+
+void PrintWindowsMemUsage()
+{
+	MEMORYSTATUSEX statex;
+
+	statex.dwLength = sizeof(statex);
+
+	GlobalMemoryStatusEx(&statex);
+
+	printf("There is  %*ld percent of memory in use.\n",
+		WIDTH, statex.dwMemoryLoad);
+	printf("There are %*I64d total KB of physical memory.\n",
+		WIDTH, statex.ullTotalPhys / DIV);
+	printf("There are %*I64d free  KB of physical memory.\n",
+		WIDTH, statex.ullAvailPhys / DIV);
+	printf("There are %*I64d total KB of paging file.\n",
+		WIDTH, statex.ullTotalPageFile / DIV);
+	printf("There are %*I64d free  KB of paging file.\n",
+		WIDTH, statex.ullAvailPageFile / DIV);
+	printf("There are %*I64d total KB of virtual memory.\n",
+		WIDTH, statex.ullTotalVirtual / DIV);
+	printf("There are %*I64d free  KB of virtual memory.\n",
+		WIDTH, statex.ullAvailVirtual / DIV);
+
+	// Show the amount of extended memory available.
+
+	printf("There are %*I64d free  KB of extended memory.\n",
+		WIDTH, statex.ullAvailExtendedVirtual / DIV);
+}
+
+
+
+
+int RunMemTest(bool verboseOutput, size_t numTestIterations)
+{
+	WIN32_FIND_DATA ffd;
+	srand(static_cast<unsigned int>(time(NULL)));
+	HANDLE hFind = INVALID_HANDLE_VALUE;
+
+	
+	vector<string> allImages;
+
+	string imagesDir = "D:\\source\\\etc2Sourcetree\\TestImages\\";
+	string outputDir = "C:\\Users\\BSI\\Desktop\\etc2comp\\";
+
+	hFind = FindFirstFile((imagesDir+"*").c_str(), &ffd);
+
+	if (INVALID_HANDLE_VALUE == hFind)
+	{
+		printf ("image dir doesnt exist: %s\n", imagesDir.c_str());
+		return -1;
+	}
+
+	do
+	{
+		if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+		{
+			continue;
+		}
+		else
+		{
+			string filename = ffd.cFileName;
+			size_t found = filename.find_last_of(".");
+			string ext = filename.substr(found + 1);
+			if(ext != "png")
+				continue;
+
+			allImages.push_back(filename);
+		}
+	} while (FindNextFile(hFind, &ffd) != 0);
+	FindClose(hFind);
+
+	size_t oldMemSize = 0;
+	size_t curMemSize = 0;
+	size_t fileIndex = 0;
+	int encodingType = 2;//0 = c , 1= both c and c++ encoding, 2 = c++ encoding
+	bool encodeRandomImage = true;
+	for (size_t i = 0; i < numTestIterations; i++)
+	{
+		encodingType = rand()%3;
+
+		printf("--------------iteration %zu-----------\n",i+1);
+		int randomNumber = 1 + rand() % 11;
+		//Image::Format format = (Image::Format)randomNumber;
+		Image::Format format = Image::Format::RGBA8;
+
+		randomNumber = rand() % 4;
+		ErrorMetric e_ErrMetric = (ErrorMetric)randomNumber;
+
+		randomNumber = rand() % 100;
+		float curEffort = (float)randomNumber;
+
+		int jobs = 1 + rand() % 8;
+		long memDiff = 0;
+
+		if (verboseOutput)
+		{
+			printf("    effort = %.f\n", curEffort);
+			printf("  encoding =  %s\n", Image::EncodingFormatToString(format));
+			printf("  error metric: %s\n", ErrorMetricToString(e_ErrMetric));
+			printf("jobs: %d\n", jobs);
+		}
+
+		oldMemSize = curMemSize;
+		curMemSize = GetMemoryUsageAmount();
+		memDiff = (long)curMemSize - (long)oldMemSize;
+		printf("mem diff: %ukb\n", memDiff);
+		
+		string curFile = imagesDir + allImages[fileIndex];
+		string outFile = outputDir + allImages[fileIndex] +".ktx";
+		printf("file: %s\n", curFile.c_str());
+
+		if (!encodeRandomImage)
+		{
+			if (fileIndex >= allImages.size() - 1)
+				fileIndex = 0;
+			else
+				fileIndex++;
+		}
+		else
+		{
+			fileIndex = rand()%allImages.size();
+		}
+		SourceImage sourceimage(curFile.c_str(), -1, -1);
+
+		unsigned int uiSourceWidth = sourceimage.GetWidth();
+		unsigned int uiSourceHeight = sourceimage.GetHeight();
+		//char *imgonnaleak = new char[1000];
+
+		unsigned char *paucEncodingBits;
+		unsigned int uiEncodingBitsBytes;
+		unsigned int uiExtendedWidth;
+		unsigned int uiExtendedHeight;
+		int iEncodingTime_ms;
+		//////////////C INTERFACE FIRST//////////////////////
+		if (encodingType < 2)
+		{
+			if (verboseOutput)
+			{
+				printf("start C interface Encoding:\n");
+			}
+
+			Etc::Encode((float *)sourceimage.GetPixels(),
+				uiSourceWidth, uiSourceHeight,
+				format,
+				e_ErrMetric,
+				curEffort,
+				jobs,
+				MAX_JOBS,
+				&paucEncodingBits, &uiEncodingBitsBytes,
+				&uiExtendedWidth, &uiExtendedHeight,
+				&iEncodingTime_ms, verboseOutput);
+			if (verboseOutput)
+			{
+				printf("    encode time = %dms\n", iEncodingTime_ms);
+				printf("EncodedImage: %s\n", outFile.c_str());
+			}
+			Etc::File C_interfaceEtcfile(outFile.c_str(), Etc::File::Format::INFER_FROM_FILE_EXTENSION,
+				format,
+				paucEncodingBits, uiEncodingBitsBytes,
+				uiSourceWidth, uiSourceHeight,
+				uiExtendedWidth, uiExtendedHeight);
+			C_interfaceEtcfile.Write();
+
+			oldMemSize = curMemSize;
+			curMemSize = GetMemoryUsageAmount();
+			memDiff = (long)curMemSize - (long)oldMemSize;
+			printf("mem diff again: %ukb\n", memDiff);
+		}
+		//////////////C++ INTERFACE ///////////////////////////
+		if (encodingType > 0)
+		{
+			if (verboseOutput)
+			{
+				printf("start C++ Encoding:\n");
+			}
+			Etc::Image image((float *)sourceimage.GetPixels(),
+				uiSourceWidth, uiSourceHeight,
+				e_ErrMetric);
+			image.m_bVerboseOutput = verboseOutput;
+
+			Etc::Image::EncodingStatus encStatus = Etc::Image::EncodingStatus::SUCCESS;
+
+			encStatus = image.Encode(format, e_ErrMetric, curEffort, jobs, MAX_JOBS);
+			if (verboseOutput)
+			{
+				printf("  encode time = %dms\n", image.GetEncodingTimeMs());
+				printf("EncodedImage: %s\n", outFile.c_str());
+				printf("status bitfield: %u\n", encStatus);
+			}
+			Etc::File etcfile(outFile.c_str(), Etc::File::Format::INFER_FROM_FILE_EXTENSION,
+				format,
+				image.GetEncodingBits(), image.GetEncodingBitsBytes(),
+				image.GetSourceWidth(), image.GetSourceHeight(),
+				image.GetExtendedWidth(), image.GetExtendedHeight());
+
+			etcfile.Write();
+		}
+		
+	}
+
+	return 0;
+}
+
+#endif
diff --git a/EtcTool/EtcMemTest.h b/EtcTool/EtcMemTest.h
new file mode 100644
index 0000000..f225159
--- /dev/null
+++ b/EtcTool/EtcMemTest.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+
+#include "Etc.h"
+
+#include "EtcSourceImage.h"
+#include "EtcFile.h"
+#include "EtcMath.h"
+#include "EtcImage.h"
+#include "EtcErrorMetric.h"
+#include "EtcBlock4x4EncodingBits.h"
+#include "EtcFileHeader.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAX_JOBS 1024
+using namespace Etc;
+
+int RunMemTest(bool verboseOutput, size_t numTestIterations);
+
+
+
+
+
+/*int getMem()
+{
+
+int tSize = 0, resident = 0, share = 0;
+ifstream buffer("/proc/self/statm");
+buffer >> tSize >> resident >> share;
+buffer.close();
+
+long page_size_kb = sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages
+double rss = resident * page_size_kb;
+cout << "RSS - " << rss << " kB\n";
+
+double shared_mem = share * page_size_kb;
+cout << "Shared Memory - " << shared_mem << " kB\n";
+
+cout << "Private Memory - " << rss - shared_mem << "kB\n";
+return 0;
+}*/
diff --git a/EtcTool/EtcSourceImage.cpp b/EtcTool/EtcSourceImage.cpp
new file mode 100644
index 0000000..0c64fea
--- /dev/null
+++ b/EtcTool/EtcSourceImage.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#define _CRT_SECURE_NO_WARNINGS (1)
+#endif
+
+#include "EtcConfig.h"
+#include "EtcSourceImage.h"
+#include "Etc.h"
+
+#if USE_STB_IMAGE_LOAD
+#include "stb_image.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "lodepng.h"
+
+namespace Etc
+{
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	SourceImage::SourceImage(const char *a_pstrFilename, int a_iPixelX, int a_iPixelY)
+	{
+		m_pstrFilename = nullptr;
+		m_pstrName = nullptr;
+		m_pstrFileExtension = nullptr;
+		m_uiWidth = 0;
+		m_uiHeight = 0;
+		m_pafrgbaPixels = nullptr;
+
+		SetName(a_pstrFilename);
+
+		Read(a_iPixelX, a_iPixelY);
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	SourceImage::SourceImage(ColorFloatRGBA *a_pafrgbaSource,
+								unsigned int a_uiSourceWidth,
+								unsigned int a_uiSourceHeight)
+	{
+		m_pstrFilename = nullptr;
+		m_pstrName = nullptr;
+		m_pstrFileExtension = nullptr;
+		m_uiWidth = a_uiSourceWidth;
+		m_uiHeight = a_uiSourceHeight;
+		m_pafrgbaPixels = a_pafrgbaSource;
+
+	}
+	// ----------------------------------------------------------------------------------------------------
+	//
+	SourceImage::~SourceImage()
+	{
+		if (m_pstrFilename != nullptr)
+		{
+			delete[] m_pstrFilename;
+			m_pstrFilename = nullptr;
+		}
+
+		if (m_pstrName != nullptr)
+		{
+			delete[] m_pstrName;
+			m_pstrName = nullptr;
+			m_pstrFileExtension = nullptr;
+		}
+
+		if (m_pafrgbaPixels != nullptr)
+		{
+			delete[] m_pafrgbaPixels;
+			m_pafrgbaPixels = nullptr;
+		}
+		m_uiWidth = 0;
+		m_uiHeight = 0;
+	}
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void SourceImage::Read(int a_iPixelX, int a_iPixelY)
+	{
+		unsigned char* paucPixels = nullptr;
+
+		int iWidth;
+		int iHeight;
+		int iBitsPerPixel;
+		bool bool16BitImage = false;
+	
+#if USE_STB_IMAGE_LOAD
+		//if stb_iamge is available, only use it to load files other than png
+		char *fileExt = strrchr(m_pstrFilename, '.');
+
+		if (strcmp(fileExt, ".png") != 0)
+		{
+			paucPixels = stbi_load(m_pstrFilename, &iWidth, &iHeight, &iBitsPerPixel, 4);
+
+			if (paucPixels == nullptr)
+			{
+				printf("stb_image error %s\n", stbi_failure_reason());
+				assert(0);
+				exit(1);
+			}
+		}
+#endif
+
+		if (paucPixels == nullptr)
+		{
+			//we can load 8 or 16 bit pngs
+			int iBitDepth = 16;
+			int error = lodepng_decode_file(&paucPixels,
+				(unsigned int*)&iWidth, (unsigned int*)&iHeight,
+				m_pstrFilename,
+				LCT_RGBA, iBitDepth);
+
+			bool16BitImage = (iBitDepth == 16) ? true : false;
+			if (error)
+			{
+				printf("lodePNG error %u: %s\n", error, lodepng_error_text(error));
+				assert(0);
+				exit(1);
+			}
+		}
+
+		//the pixel cords for the top left corner of the block
+		int iBlockX = 0;
+		int iBlockY = 0;
+		if (a_iPixelX > -1 && a_iPixelY > -1)
+		{
+			// in 1 block mode, we basically will have an img thats 4x4
+			m_uiWidth = 4;
+			m_uiHeight = 4;
+
+			if(a_iPixelX > iWidth)
+				a_iPixelX = iWidth;
+			if (a_iPixelY > iHeight)
+				a_iPixelY = iHeight;
+
+			// remove the bottom 2 bits to get the block coordinates 
+			iBlockX = (a_iPixelX & 0xFFFFFFFC);
+			iBlockY = (a_iPixelY & 0xFFFFFFFC);
+		}
+		else
+		{
+			m_uiWidth = iWidth;
+			m_uiHeight = iHeight;
+		}
+
+		m_pafrgbaPixels = new ColorFloatRGBA[m_uiWidth * m_uiHeight];
+		assert(m_pafrgbaPixels);
+
+		int iBytesPerPixel = bool16BitImage ? 8 : 4;
+		unsigned char *pucPixel;	// = &paucPixels[(iBlockY * iWidth + iBlockX) * iBytesPerPixel];
+		ColorFloatRGBA *pfrgbaPixel = m_pafrgbaPixels;
+
+		// convert pixels from RGBA* to ColorFloatRGBA
+		for (unsigned int uiV = iBlockY; uiV < (iBlockY+m_uiHeight); ++uiV)
+		{
+			// reset coordinate for each row
+			pucPixel = &paucPixels[(uiV * iWidth + iBlockX) * iBytesPerPixel];
+
+			// read each row
+			for (unsigned int uiH = iBlockX; uiH < (iBlockX+m_uiWidth); ++uiH)
+			{
+				if (bool16BitImage)
+				{
+						unsigned short ushR = (pucPixel[0]<<8) + pucPixel[1];
+						unsigned short ushG = (pucPixel[2]<<8) + pucPixel[3];
+						unsigned short ushB = (pucPixel[4]<<8) + pucPixel[5];
+						unsigned short ushA = (pucPixel[6]<<8) + pucPixel[7];
+
+						*pfrgbaPixel++ = ColorFloatRGBA((float)ushR / 65535.0f,
+														(float)ushG / 65535.0f,
+														(float)ushB / 65535.0f,
+														(float)ushA / 65535.0f);
+				}
+				else
+				{
+						*pfrgbaPixel++ = ColorFloatRGBA::ConvertFromRGBA8(pucPixel[0], pucPixel[1],
+																			pucPixel[2], pucPixel[3]);
+				}
+
+				pucPixel += iBytesPerPixel;
+			}
+		}
+
+#if USE_STB_IMAGE_LOAD
+		stbi_image_free(paucPixels);
+#else
+		free(paucPixels);
+#endif
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	// sets m_pstrFilename, m_pstrName and m_pstrFileExtension
+	//
+	void SourceImage::SetName(const char *a_pstrFilename)
+	{
+		if (a_pstrFilename == nullptr)
+		{
+			return;
+		}
+
+		m_pstrFilename = new char[strlen(a_pstrFilename) + 1];
+		strcpy(m_pstrFilename, a_pstrFilename);
+
+		m_pstrName = new char[strlen(m_pstrFilename) + 1];
+
+		// ignore directory path
+		char *pcLastSlash = strrchr(m_pstrFilename, '/');
+		char *pcLastBackSlash = strrchr(m_pstrFilename, '\\');
+		if (pcLastSlash == nullptr && pcLastBackSlash == nullptr)
+		{
+			strcpy(m_pstrName, m_pstrFilename);
+		}
+		else if (pcLastSlash > pcLastBackSlash)
+		{
+			strcpy(m_pstrName, pcLastSlash + 1);
+		}
+		else
+		{
+			strcpy(m_pstrName, pcLastBackSlash + 1);
+		}
+
+		// find file extension and remove it from image name
+		char *pcLastPeriod = strrchr(m_pstrName, '.');
+		if (pcLastPeriod != nullptr)
+		{
+			m_pstrFileExtension = pcLastPeriod + 1;
+			*strrchr(m_pstrName, '.') = 0;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void SourceImage::NormalizeXYZ(void)
+	{
+		int iPixels = m_uiWidth * m_uiHeight;
+
+		ColorFloatRGBA *pfrgbaPixel = m_pafrgbaPixels;
+		for (int iPixel = 0; iPixel < iPixels; iPixel++)
+		{
+			float fX = 2.0f*pfrgbaPixel->fR - 1.0f;
+			float fY = 2.0f*pfrgbaPixel->fG - 1.0f;
+			float fZ = 2.0f*pfrgbaPixel->fB - 1.0f;
+
+			float fLength2 = fX*fX + fY*fY + fZ*fZ;
+
+			if (fLength2 == 0.0f)
+			{
+				pfrgbaPixel->fR = 1.0f;
+				pfrgbaPixel->fG = 0.0f;
+				pfrgbaPixel->fB = 0.0f;
+			}
+			else
+			{
+				float fLength = sqrtf(fLength2);
+
+				float fNormalizedX = fX / fLength;
+				float fNormalizedY = fY / fLength;
+				float fNormalizedZ = fZ / fLength;
+
+				pfrgbaPixel->fR = 0.5f * (fNormalizedX + 1.0f);
+				pfrgbaPixel->fG = 0.5f * (fNormalizedY + 1.0f);
+				pfrgbaPixel->fB = 0.5f * (fNormalizedZ + 1.0f);
+			}
+
+			pfrgbaPixel++;
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+
+}	// namespace Etc
diff --git a/EtcTool/EtcSourceImage.h b/EtcTool/EtcSourceImage.h
new file mode 100644
index 0000000..0e57e6e
--- /dev/null
+++ b/EtcTool/EtcSourceImage.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcColorFloatRGBA.h"
+
+namespace Etc
+{
+
+	class SourceImage
+	{
+	public:
+
+		SourceImage(const char *a_pstrFilename, int a_iPixelX = -1, int a_iPixelY = -1);
+
+		SourceImage(ColorFloatRGBA *a_pafrgbaSource,
+					unsigned int a_uiSourceWidth,
+					unsigned int a_uiSourceHeight);
+		
+		~SourceImage();
+
+		void SetName(const char *a_pstrFilename);
+
+		void NormalizeXYZ(void);
+
+		inline const char *GetFilename(void)
+		{
+			return m_pstrFilename; 
+		}
+
+		inline const char *GetName(void) 
+		{ 
+			return m_pstrName; 
+		}
+
+		inline const char *GetFileExtension(void) 
+		{ 
+			return m_pstrFileExtension; 
+		}
+
+		inline unsigned int GetWidth(void) 
+		{ 
+			return m_uiWidth; 
+		}
+
+		inline unsigned int GetHeight(void) 
+		{ 
+			return m_uiHeight; 
+		}
+
+		inline ColorFloatRGBA * GetPixels(void)
+		{
+			return m_pafrgbaPixels;
+		}
+
+		inline ColorFloatRGBA * GetPixel(unsigned int a_uiColumn, unsigned int a_uiRow)
+		{
+			if (m_pafrgbaPixels == nullptr)
+			{
+				return nullptr;
+			}
+
+			return &m_pafrgbaPixels[a_uiRow*m_uiWidth + a_uiColumn];
+		}
+
+	private:
+
+		void Read(int a_iPixelX = -1, int a_iPixelY = -1);
+
+		char *m_pstrFilename;				// includes directory path and file extension
+		char *m_pstrName;					// file name with directory path and file extension removed
+		char *m_pstrFileExtension;
+		unsigned int m_uiWidth;				// not necessarily block aligned
+		unsigned int m_uiHeight;			// not necessarily block aligned
+		ColorFloatRGBA *m_pafrgbaPixels;
+
+	};
+
+}	// namespace Etc
diff --git a/EtcTool/EtcTool.cpp b/EtcTool/EtcTool.cpp
new file mode 100644
index 0000000..bc478fc
--- /dev/null
+++ b/EtcTool/EtcTool.cpp
@@ -0,0 +1,719 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef _WIN32
+#define _CRT_SECURE_NO_WARNINGS (1)
+#endif
+/*
+since this code will be used on a wide varity of platforms and configurations
+its important to have some sort of sanity check for the amount of threads that can be used.
+change this macro to suit your configuration. This will be the maximum amount of threads
+that can be created. 
+*/
+#define MAX_JOBS 1024
+
+#define RUN_MEM_TEST 0
+
+#include "EtcConfig.h"
+
+#include "Etc.h"
+
+#include "EtcTool.h"
+#include "EtcSourceImage.h"
+#include "EtcFile.h"
+#include "EtcMath.h"
+#include "EtcImage.h"
+#include "EtcErrorMetric.h"
+#include "EtcBlock4x4EncodingBits.h"
+
+#include "EtcAnalysis.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+using namespace Etc;
+
+#if ETC_WINDOWS
+const char *ETC_MKDIR_COMMAND = "mkdir";
+
+int strcasecmp(const char *s1, const char *s2)
+{
+	return _stricmp(s1, s2);
+}
+#else
+const char *ETC_MKDIR_COMMAND = "mkdir -p";
+#endif
+
+#if RUN_MEM_TEST
+#include "EtcMemTest.h"
+#endif
+
+class Commands
+{
+public:
+
+	static const unsigned int MIN_JOBS = 8;
+
+	Commands(void)
+	{
+		pstrSourceFilename = nullptr;
+		pstrOutputFilename = nullptr;
+		format = Image::Format::DEFAULT;
+		pstrAnalysisDirectory = nullptr;
+		uiComparisons = 0;
+		for (unsigned int uiComparison = 0; uiComparison < Analysis::MAX_COMPARISONS; uiComparison++)
+		{
+			apstrCompareFilename[uiComparison] = nullptr;
+		}
+		fEffort = 40.0f;
+		//Rec. 709 or BT.709...the default
+		e_ErrMetric = ErrorMetric::BT709;
+		uiJobs = MIN_JOBS;
+
+		//these are ignored if they are < 0
+		i_hPixel = -1;
+		i_vPixel = -1;
+		verboseOutput = false;
+		boolNormalizeXYZ = false;
+	}
+
+	bool ProcessCommandLineArguments(int a_iArgs, const char *a_apstrArgs[]);
+	void PrintUsageMessage(void);
+	static void FixSlashes(char *a_pstr);
+
+	char *pstrSourceFilename;
+	char *pstrOutputFilename;
+
+	Image::Format format;
+	char *pstrAnalysisDirectory;
+	char *formatType;
+	unsigned int uiComparisons;
+	char *apstrCompareFilename[Analysis::MAX_COMPARISONS];
+	float fEffort;
+	ErrorMetric e_ErrMetric;
+	unsigned int uiJobs;		// for threading
+	bool verboseOutput;
+	//when both of these are >= 0 then single block mode is on
+	int i_hPixel;
+	int i_vPixel;
+	bool boolNormalizeXYZ;
+};
+
+#include "EtcFileHeader.h"
+
+// ----------------------------------------------------------------------------------------------------
+//
+int main(int argc, const char * argv[])
+{
+
+	static const bool USE_C_INTERFACE = false;
+
+	// this code tests for memory leaks
+#if RUN_MEM_TEST
+	RunMemTest(true, 100);
+	printf("an extra line to see how the memory is free'd\n");
+	printf("all done!\n");
+	exit(0);
+#endif
+
+	Commands commands;
+	bool boolPrintUsage = commands.ProcessCommandLineArguments(argc, argv);
+	if (boolPrintUsage)
+	{
+		commands.PrintUsageMessage();
+		exit(1);
+	}
+
+	if (commands.verboseOutput)
+	{
+		printf("SourceImage: %s\n", commands.pstrSourceFilename);
+	}
+	SourceImage sourceimage(commands.pstrSourceFilename, commands.i_hPixel, commands.i_vPixel);
+	if (commands.boolNormalizeXYZ)
+	{
+		sourceimage.NormalizeXYZ();
+	}
+
+	unsigned int uiSourceWidth = sourceimage.GetWidth();
+	unsigned int uiSourceHeight = sourceimage.GetHeight();
+
+	if (USE_C_INTERFACE)
+	{
+		unsigned char *paucEncodingBits;
+		unsigned int uiEncodingBitsBytes;
+		unsigned int uiExtendedWidth;
+		unsigned int uiExtendedHeight;
+		int iEncodingTime_ms;
+		
+		if (commands.verboseOutput)
+		{
+			printf("Encoding:\n");
+			printf("    effort = %.f\n", commands.fEffort);
+			printf("  encoding =  %s\n", Image::EncodingFormatToString(commands.format));
+			printf("  error metric: %s\n", ErrorMetricToString(commands.e_ErrMetric));
+		}
+		Etc::Encode((float *)sourceimage.GetPixels(),
+					uiSourceWidth, uiSourceHeight,
+					commands.format,
+					commands.e_ErrMetric,
+					commands.fEffort,
+					commands.uiJobs,
+					MAX_JOBS,
+					&paucEncodingBits, &uiEncodingBitsBytes,
+					&uiExtendedWidth, &uiExtendedHeight,
+					&iEncodingTime_ms);
+		if (commands.verboseOutput)
+		{
+			printf("    encode time = %dms\n", iEncodingTime_ms);
+			printf("EncodedImage: %s\n", commands.pstrOutputFilename);
+		}
+		Etc::File etcfile(commands.pstrOutputFilename, Etc::File::Format::INFER_FROM_FILE_EXTENSION,
+							commands.format,
+							paucEncodingBits, uiEncodingBitsBytes,
+							uiSourceWidth, uiSourceHeight,
+							uiExtendedWidth, uiExtendedHeight);
+		etcfile.Write();
+	}
+	else
+	{
+		if (commands.verboseOutput)
+		{
+			printf("Encoding:\n");
+			printf("  effort = %.f%%\n", commands.fEffort);
+			printf("  encoding =  %s\n", Image::EncodingFormatToString(commands.format));
+			printf("  error metric: %s\n", ErrorMetricToString(commands.e_ErrMetric));
+		}
+		Etc::Image image((float *)sourceimage.GetPixels(),
+							uiSourceWidth, uiSourceHeight,
+							commands.e_ErrMetric);
+		image.m_bVerboseOutput = commands.verboseOutput;
+		Etc::Image::EncodingStatus encStatus = Etc::Image::EncodingStatus::SUCCESS;
+		
+		encStatus = image.Encode(commands.format, commands.e_ErrMetric, commands.fEffort, commands.uiJobs,MAX_JOBS);
+		if (commands.verboseOutput)
+		{
+			printf("  encode time = %dms\n", image.GetEncodingTimeMs());
+			printf("EncodedImage: %s\n", commands.pstrOutputFilename);
+			printf("status bitfield: %u\n", encStatus);
+		}
+		Etc::File etcfile(commands.pstrOutputFilename, Etc::File::Format::INFER_FROM_FILE_EXTENSION,
+							commands.format,
+							image.GetEncodingBits(), image.GetEncodingBitsBytes(),
+							image.GetSourceWidth(), image.GetSourceHeight(),
+							image.GetExtendedWidth(), image.GetExtendedHeight());
+
+		etcfile.Write();
+
+		if (commands.pstrAnalysisDirectory)
+		{
+			if (commands.verboseOutput)
+			{
+				printf("Analysis: %s\n", commands.pstrAnalysisDirectory);
+			}
+			Analysis analysis(&image, commands.pstrAnalysisDirectory);
+
+			for (unsigned int uiComparison = 0; uiComparison < commands.uiComparisons; uiComparison++)
+			{
+				analysis.Compare(commands.apstrCompareFilename[uiComparison], commands.i_hPixel, commands.i_vPixel);
+			}
+		}
+
+	}
+
+	return 0;
+}
+
+// ----------------------------------------------------------------------------------------------------
+// return true if usage message should be printed
+//
+bool Commands::ProcessCommandLineArguments(int a_iArgs, const char *a_apstrArgs[])
+{
+	static const bool DEBUG_PRINT = false;
+
+	if (a_iArgs == 1)
+	{
+		printf("Error: missing arguments\n");
+		return true;
+	}
+
+	for (int iArg = 1; iArg < a_iArgs; iArg++)
+    {
+		if (DEBUG_PRINT)
+		{
+			printf("%s: %u %s\n", a_apstrArgs[0], iArg, a_apstrArgs[iArg]);
+		}
+
+		if (strcmp(a_apstrArgs[iArg], "-analyze") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing folder parameter for -analyze\n");
+				return true;
+			}
+			else
+			{
+				pstrAnalysisDirectory = new char[strlen(a_apstrArgs[iArg]) + 1];
+				strcpy(pstrAnalysisDirectory, a_apstrArgs[iArg]);
+				FixSlashes(pstrAnalysisDirectory);
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-argfile") == 0)
+		{
+			static const unsigned int MAX_LINE_CHARS = 1000;
+			static const unsigned int MAX_ARGFILE_ARGS = 100;
+
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing file parameter for -argfile\n");
+				return true;
+			}
+			else
+			{
+				FILE *pfile = fopen(a_apstrArgs[iArg], "rt");
+				if (pfile == nullptr)
+				{
+					printf("Error: couldn't open argfile (%s)\n", a_apstrArgs[iArg]);
+					return true;
+				}
+
+				char **apstrArgs = new char *[MAX_ARGFILE_ARGS];
+				assert(apstrArgs);
+
+				// add null executable name
+				apstrArgs[0] = const_cast<char *>("");
+				int iArgs = 1;
+
+				// read in tokens
+				{
+					char *pcTokens = new char[MAX_LINE_CHARS + 1];
+					assert(pcTokens);
+					char *pcToken = nullptr;
+
+					// read in each line
+					while (fgets(pcTokens, MAX_LINE_CHARS, pfile))
+					{
+						// skip over lines with '#' in first char
+						if (pcTokens[0] == '#')
+						{
+							continue;
+						}
+
+						// abort remainder of argfile with '%' in first char
+						if (pcTokens[0] == '%')
+						{
+							break;
+						}
+						pcToken = strtok(pcTokens, " \n\r");
+
+						if (pcToken != nullptr)
+						{
+							apstrArgs[iArgs] = new char[strlen(pcToken) + 1];
+							strcpy(apstrArgs[iArgs], pcToken);
+							iArgs++;
+						}
+
+						while (pcToken != nullptr)
+						{
+							pcToken = strtok(nullptr, " \n");
+							if (pcToken != nullptr)
+							{
+								apstrArgs[iArgs] = new char[strlen(pcToken) + 1];
+								strcpy(apstrArgs[iArgs], pcToken);
+								iArgs++;
+							}
+						}
+					}
+
+					delete[] pcTokens;
+				}
+
+				fclose(pfile);
+
+				bool boolErrors = ProcessCommandLineArguments(iArgs, const_cast<const char **>(apstrArgs));
+
+				for (iArg = 1; iArg < iArgs; iArg++)
+				{
+					delete[] apstrArgs[iArg];
+				}
+				delete[] apstrArgs;
+
+				if (boolErrors)
+				{
+					return true;
+				}
+			}
+		}
+		//used for debugging...select a single block to encode
+		//supply the horiz and very pos of the block
+		else if (strcmp(a_apstrArgs[iArg], "-blockAtHV") == 0)
+		{
+			++iArg;
+
+			//make sure we have two more args after -block
+			if (iArg + 1 >= (a_iArgs))
+			{
+				printf("Error: missing horiz and vert position of pixel for single block mode \n");
+				return true;
+			}
+			i_hPixel = atoi(a_apstrArgs[iArg]);
+			++iArg;
+			i_vPixel = atoi(a_apstrArgs[iArg]);
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-compare") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing comprison_image parameter for -compare\n");
+				return true;
+			}
+			else
+			{
+				if (uiComparisons >= Analysis::MAX_COMPARISONS)
+				{
+					printf("Error: too many comparisons\n");
+					return true;
+				}
+
+				char **ppstrCompareFilename = &apstrCompareFilename[uiComparisons++];
+
+				*ppstrCompareFilename = new char[strlen(a_apstrArgs[iArg]) + 1];
+				strcpy(*ppstrCompareFilename, a_apstrArgs[iArg]);
+				FixSlashes(*ppstrCompareFilename);
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-effort") == 0)
+		{
+			++iArg;
+
+            if (iArg >= (a_iArgs))
+            {
+				printf("Error: missing amount parameter for -effort\n");
+				return true;
+			}
+            else
+            {
+                float f;
+                int iScans = sscanf(a_apstrArgs[iArg], "%f", &f);
+
+                if (iScans != 1)
+                {
+					printf("Error: couldn't parse amount for -effort (%s)\n", a_apstrArgs[iArg]);
+					return true;
+				}
+                else
+                {
+                    fEffort = f;
+                }
+            }
+        }
+		else if (strcmp(a_apstrArgs[iArg], "-errormetric") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing error metric type %s\n", a_apstrArgs[iArg]);
+				return true;
+			}
+			else
+			{
+				if (strcmp(a_apstrArgs[iArg], "rgba") == 0)
+				{
+					e_ErrMetric = ErrorMetric::RGBA;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "rec709") == 0)
+				{
+					e_ErrMetric = ErrorMetric::REC709;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "numeric") == 0)
+				{
+					e_ErrMetric = ErrorMetric::NUMERIC;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "normalxyz") == 0 ||
+					strcmp(a_apstrArgs[iArg], "normalXYZ") == 0)
+				{
+					e_ErrMetric = ErrorMetric::NORMALXYZ;
+				}
+				else
+				{
+					printf("unrecognized error metric (%s), using numeric\n", a_apstrArgs[iArg]);
+					e_ErrMetric = ErrorMetric::NUMERIC;
+				}
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-format") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing etc_format parameter for -format\n");
+				return true;
+			}
+			else
+			{
+				formatType = new char[strlen(a_apstrArgs[iArg])+1];
+				strcpy(formatType,a_apstrArgs[iArg]);
+				if (strcmp(a_apstrArgs[iArg], "ETC1") == 0)
+				{
+					format = Image::Format::ETC1;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "RGB8") == 0)
+				{
+					format = Image::Format::RGB8;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "SRGB8") == 0)
+				{
+					format = Image::Format::SRGB8;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "RGBA8") == 0)
+				{
+					format = Image::Format::RGBA8;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "SRGBA8") == 0)
+				{
+					format = Image::Format::SRGBA8;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "R11") == 0)
+				{
+					format = Image::Format::R11;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "SIGNED_R11") == 0)
+				{
+					format = Image::Format::SIGNED_R11;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "RG11") == 0)
+				{
+					format = Image::Format::RG11;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "SIGNED_RG11") == 0)
+				{
+					format = Image::Format::SIGNED_RG11;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "RGB8A1") == 0)
+				{
+					format = Image::Format::RGB8A1;
+				}
+				else if (strcmp(a_apstrArgs[iArg], "SRGB8A1") == 0)
+				{
+					format = Image::Format::SRGB8A1;
+				}
+				else
+				{
+					printf("Error: unknown etc_format parameter for -format\n");
+					format = Image::Format::UNKNOWN;
+					return true;
+				}
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-help") == 0)
+		{
+			return true;
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-j") == 0 ||
+				 strcmp(a_apstrArgs[iArg], "-jobs") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing job count for %s\n", a_apstrArgs[iArg]);
+				return true;
+			}
+			else
+			{
+				unsigned int ui;
+				int iScans = sscanf(a_apstrArgs[iArg], "%u", &ui);
+
+				if (iScans != 1)
+				{
+					printf("Error: couldn't parse job count for %s (%s)\n", a_apstrArgs[iArg-1], a_apstrArgs[iArg]);
+					return true;
+				}
+				else
+				{
+					if (ui < MIN_JOBS)
+					{
+						ui = MIN_JOBS;
+					}
+
+					uiJobs = ui;
+				}
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-normalizexyz") == 0 ||
+				 strcmp(a_apstrArgs[iArg], "-normalizeXYZ") == 0)
+		{
+			boolNormalizeXYZ = true;
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-output") == 0)
+		{
+			++iArg;
+
+			if (iArg >= (a_iArgs))
+			{
+				printf("Error: missing encoded_image parameter for -output\n");
+				return true;
+			}
+			else
+			{
+				pstrOutputFilename = new char[strlen(a_apstrArgs[iArg]) + 1];
+				strcpy(pstrOutputFilename, a_apstrArgs[iArg]);
+				//take the output file name and extract the directory path so we can create the directory if nescacary
+				char *ptrOutputDir = nullptr;
+
+				FixSlashes(pstrOutputFilename);
+				for (int c = (int)strlen(pstrOutputFilename); c > 0; c--)
+				{
+					//find the last slash, to get the name of the directory
+					if (pstrOutputFilename[c] == ETC_PATH_SLASH)
+					{
+						c++;
+						ptrOutputDir = new char[c];
+						strncpy(ptrOutputDir, pstrOutputFilename, c);
+						ptrOutputDir[c] = '\0';
+						CreateNewDir(ptrOutputDir);
+						break;
+					}
+				}
+
+				if (ptrOutputDir == nullptr)
+				{
+					printf("couldnt find a place to put converted images\n");
+					exit(1);
+				}
+			}
+		}
+		else if (strcmp(a_apstrArgs[iArg], "-verbose") == 0 ||
+			strcmp(a_apstrArgs[iArg], "-v") == 0)
+		{
+			verboseOutput = true;
+		}
+
+		else if (a_apstrArgs[iArg][0] == '-')
+        {
+			printf("Error: unknown option (%s)\n", a_apstrArgs[iArg]);
+			return true;
+		}
+		else if (a_apstrArgs[iArg][0] == '\r')
+        {
+			continue;
+		}
+        else
+        {
+			if (pstrSourceFilename != nullptr)
+			{
+				printf("Error: only support one source_image (%s)\n", a_apstrArgs[iArg]);
+				return true;
+			}
+
+			pstrSourceFilename = new char[strlen(a_apstrArgs[iArg])+1];
+			strcpy(pstrSourceFilename, a_apstrArgs[iArg]);
+        }
+    }
+
+	if (pstrSourceFilename == nullptr)
+	{
+		printf("Error: missing source_image\n");
+		return true;
+	}
+
+	if (pstrOutputFilename == nullptr)
+	{
+		printf("Error: missing -output encoded_image\n");
+		return true;
+	}
+
+	if (uiComparisons > 0 && pstrAnalysisDirectory == nullptr)
+	{
+		printf("Error: -compare is only valid with -analyze\n");
+		return true;
+	}
+
+	return false;
+}
+
+// ----------------------------------------------------------------------------------------------------
+//
+void Commands::FixSlashes(char *a_pstr)
+{
+	while (*a_pstr)
+	{
+		if (*a_pstr == ETC_BAD_PATH_SLASH)
+		{
+			*a_pstr = ETC_PATH_SLASH;
+		}
+
+		a_pstr++;
+	}
+}
+
+// ----------------------------------------------------------------------------------------------------
+// print usage message and exit
+//
+void Commands::PrintUsageMessage(void)
+{
+	printf("Usage: etctool.exe source_image [options ...] -output <output_file>\n");
+	printf("Options:\n");
+	printf("    -analyze <analysis_folder>\n");
+	printf("    -argfile <arg_file>           additional command line arguments\n");
+	printf("    -blockAtHV <H V>              encodes a single block that contains the\n");
+	printf("                                  pixel specified by the H V coordinates\n");
+	printf("    -compare <comparison_image>   compares source_image to comparison_image\n");
+	printf("    -effort <amount>              number between 0 and 100\n");
+	printf("    -errormetric <error_metric>   specify the error metric, the options are\n");
+	printf("                                  rgba, rec709, numeric and normalxyz\n");
+	printf("    -format <etc_format>          ETC1, RGB8, SRGB8, RGBA8, SRGB8, RGB8A1,\n");
+	printf("                                  SRGB8A1 or R11\n");
+	printf("    -help                         prints this message\n");
+	printf("    -jobs or -j <thread_count>    specifies the number of threads (default=1)\n");
+	printf("    -normalizexyz                 normalize RGB to have a length of 1\n");
+	printf("    -verbose or -v                shows status information during the encoding\n");
+	printf("                                  process\n");
+	printf("\n");
+
+	exit(1);
+}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
+	void CreateNewDir(const char *path)
+	{
+		char strCommand[300];
+
+#if ETC_WINDOWS
+		sprintf_s(strCommand, "if not exist %s %s %s", path, ETC_MKDIR_COMMAND, path);
+#else
+		sprintf(strCommand, "%s %s", ETC_MKDIR_COMMAND, path);
+#endif
+		int iResult = system(strCommand);
+		if (iResult != 0)
+		{
+			printf("Error: couldn't create directory (%s)\n", path);
+			exit(0);
+		}
+
+	}
+
+	// ----------------------------------------------------------------------------------------------------
+	//
diff --git a/EtcTool/EtcTool.h b/EtcTool/EtcTool.h
new file mode 100644
index 0000000..67cfccd
--- /dev/null
+++ b/EtcTool/EtcTool.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2015 The Etc2Comp Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "EtcConfig.h"
+
+#if ETC_WINDOWS
+	const char ETC_PATH_SLASH = '\\';
+	const char ETC_BAD_PATH_SLASH = '/';
+
+	extern const char *ETC_MKDIR_COMMAND;
+	extern const char *ETC_IF_DIR_NOT_EXIST_COMMAND;
+
+	int strcasecmp(const char *s1, const char *s2);
+#else
+	const char ETC_PATH_SLASH = '/';
+	const char ETC_BAD_PATH_SLASH = '\\';
+
+	extern const char *ETC_MKDIR_COMMAND;
+#endif
+
+	void CreateNewDir(const char *path);
diff --git a/LICENSE b/LICENSE
index d645695..75b5248 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,202 +1,202 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
index 20f0a04..3e657c4 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,180 @@
-# Etc2Comp - Texture to ETC2 compressor
-
-Etc2Comp is a command line tool that converts textures (e.g. bitmaps)
-into the ETC2 format. The tool is built with a focus on encoding performance
-to reduce the amount of time required to compile asset heavy applications as
-well as reduce overall application size.
-
-Important: This is not an official Google product. It is an experimental
-library published as-is.
+# Etc2Comp - Texture to ETC2 compressor
+
+Etc2Comp is a command line tool that converts textures (e.g. bitmaps)
+into the [ETC2](https://en.wikipedia.org/wiki/Ericsson_Texture_Compression)
+format. The tool is built with a focus on encoding performance
+to reduce the amount of time required to compile asset heavy applications as
+well as reduce overall application size.
+
+This repo provides source code that can be compiled into a binary. The
+binary can then be used to convert textures to the ETC2 format.
+
+Important: This is not an official Google product. It is an experimental
+library published as-is. Please see the CONTRIBUTORS.md file for information
+about questions or issues.
+
+## Setup
+This project uses [CMake](https://cmake.org/) to generate platform-specific
+build files:
+ - Linux: make files
+ - OS X: Xcode workspace files
+ - Microsoft Windows: Visual Studio solution files
+ - Note: CMake supports other formats, but this doc only provides steps for
+ one of each platform for brevity.
+
+Refer to each platform's setup section to setup your environment and build
+an Etc2Comp binary. Then skip to the usage section of this page for examples
+of how to use the library.
+
+### Setup for OS X
+ build tested on this config:
+  OS X 10.9.5 i7 16GB RAM
+  Xcode 5.1.1
+  cmake 3.2.3
+  
+Start by downloading and installing the following components if they are not
+already installed on your development machine.
+ - *Xcode* version 5.1.1, or greater
+ - [CMake](https://cmake.org/download/) version 3.2.3, or greater
+
+To build the Etc2Comp binary:
+ 1. Open a *Terminal* window and navigate to the project directory.
+ 1. Run `mkdir build_xcode`
+ 1. Run `cd build_xcode`
+ 1. Run `cmake -G Xcode ../`
+ 1. Open *Xcode* and import the `build_xcode/EtcTest.xcodeproj` file.
+ 1. Open the Product menu and choose Build For -> Running.
+ 1. Once the build succeeds the binary located at `build_xcode/EtcTool/Debug/EtcTool`
+can be executed.
+
+Optional
+Xcode EtcTool ‘Run’ preferences
+note: if the build_xcode/EtcTest.xcodeproj is manually deleted then some Xcode preferences 
+will need to be set by hand after cmake is run (these prefs are retained across 
+cmake updates if the .xcodeproj is not deleted/removed)
+
+1. Set the active scheme to ‘EtcTool’
+1. Edit the scheme
+1. Select option ‘Run EtcTool’, then tab ‘Arguments’. 
+Add this launch argument: ‘-argfile ../../EtcTool/args.txt’
+1. Select tab ‘Options’ and set a custom working directory to: ‘$(SRCROOT)/Build_Xcode/EtcTool’
+
+### SetUp for Windows
+
+1. Make a `build_vs2015` directory. 
+1. Invoke cmake to create the Visual Studio project.
+1. open the 'EtcTest' solution
+1. make the 'EtcTool' project the start up project 
+1. (optional) in the project properties, under 'Debugging ->command arguments' 
+add the argfile textfile thats included in the EtcTool directory. 
+example: -argfile C:\etc2\EtcTool\Args.txt
+
+### Setup For Linux
+The Linux build was tested on this config:
+  Ubuntu desktop 14.04
+  gcc/g++ 4.8
+  cmake 2.8.12.2
+
+1. Verify linux has cmake and C++-11 capable g++ installed
+1. Open shell
+1  Run `mkdir build_linux`
+1  Run `cd build_linux`
+1. Invoke cmake to create the make files.
+1. Run ‘make’
+1. navigate to the newly created EtcTool directory ‘cd EtcTool’
+1. run the executable: ‘./EtcTool -argfile ../../EtcTool/args.txt’
+
+Skip to the <a href="#usage">Usage</a> section for more information about using the
+tool.
+
+## Usage
+
+### Command Line Usage
+EtcTool can be run from the command line with the following usage:
+    etctool.exe source_image [options ...] -output encoded_image
+
+The encoder will use an array of RGBA floats read from the source_image to create 
+an ETC1 or ETC2 encoded image in encoded_image.  The RGBA floats should be in the 
+range [0:1].
+
+Options:
+
+    -analyze <analysis_folder>
+    -argfile <arg_file>           additional command line arguments read from a file
+    -blockAtHV <H V>              encodes a single block that contains the
+                                  pixel specified by the H V coordinates
+    -compare <comparison_image>   compares source_image to comparison_image
+    -effort <amount>              number between 0 and 100 to specify the encoding quality 
+                                  (100 is the highest quality)
+    -errormetric <error_metric>   specify the error metric, the options are
+                                  rgba, rec709, numeric and normalxyz
+    -format <etc_format>          ETC1, RGB8, SRGB8, RGBA8, SRGB8, RGB8A1,
+                                  SRGB8A1 or R11
+    -help                         prints this message
+    -jobs or -j <thread_count>    specifies the number of threads (default=1)
+    -normalizexyz                 normalize RGB to have a length of 1
+    -verbose or -v                shows status information during the encoding
+                                  process
+
+* -analyze will run an analysis of the encoding and place it in folder 
+"analysis_folder" (e.g. ../analysis/kodim05).  within the analysis_folder, a folder 
+will be created with a name of the current date/time (e.g. 20151204_153306).  this 
+date/time folder is used to compare encodings of the same texture over time.  
+within the date/time folder is a text file with several encoding stats and a 2x png 
+image showing the encoding mode for each 4x4 block.
+
+* -argfile allows additional command line arguments to be placed in a text file
+
+* -blockAtHV selects the 4x4 pixel subset of the source image at position (H,V).  
+This is mainly used for debugging
+
+* -compare compares the source image to the created encoded image. The encoding
+will dictate what error analysis is used in the comparison.
+
+* -effort uses an "amount" between 0 and 100 to determine how much additional effort 
+to apply during the encoding.
+
+* -errormetric selects the fitting algorithm used by the encoder.  "rgba" calculates 
+RMS error using RGB components that are weighted by A.  "rec709" is similar to "rgba", 
+except the RGB components are also weighted according to Rec709.  "numeric" 
+calculates RMS error using unweighted RGBA components.  "normalize" calculates error 
+based on dot product and vector length for RGB and RMS error for A.
+
+* -help prints out the usage message
+
+* -jobs enables multi-threading to speed up image encoding
+
+* -normalizexyz normalizes the source RGB to have a length of 1.
+
+* -verbose shows information on the current encoding process. It will then display the 
+PSNR and time time it took to encode the image.
+
+Note: Path names can use slashes or backslashes.  The tool will convert the 
+slashes to the appropriate polarity for the current platform.
+
+
+## API
+
+The library supports two different APIs - a C-like API that is not heavily 
+class-based and a class-based API.
+
+main() in EtcTool.cpp contains an example of both APIs.
+
+The Encode() method now returns an EncodingStatus that contains bit flags for
+reporting various warnings and flags encountered when encoding.
+
+
+## Copyright
+Copyright 2015 Etc2Comp Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/third_party/lodepng/LICENSE b/third_party/lodepng/LICENSE
new file mode 100644
index 0000000..9382c4d
--- /dev/null
+++ b/third_party/lodepng/LICENSE
@@ -0,0 +1,25 @@
+LodePNG version 20160124
+
+Copyright (c) 2005-2016 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
diff --git a/third_party/lodepng/lodepng.cpp b/third_party/lodepng/lodepng.cpp
new file mode 100644
index 0000000..8be7f59
--- /dev/null
+++ b/third_party/lodepng/lodepng.cpp
@@ -0,0 +1,6168 @@
+/*
+LodePNG version 20160124
+
+Copyright (c) 2005-2016 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <fstream>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20160124";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size)
+{
+  return malloc(size);
+}
+
+static void* lodepng_realloc(void* ptr, size_t new_size)
+{
+  return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr)
+{
+  free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib.                       // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code)\
+{\
+  errorvar = code;\
+  break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code)\
+{\
+  errorvar = code;\
+  return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call)\
+{\
+  unsigned error = call;\
+  if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code)\
+{\
+  errorvar = code;\
+  return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*dynamic vector of unsigned ints*/
+typedef struct uivector
+{
+  unsigned* data;
+  size_t size; /*size in number of unsigned longs*/
+  size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p)
+{
+  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+  lodepng_free(((uivector*)p)->data);
+  ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_reserve(uivector* p, size_t allocsize)
+{
+  if(allocsize > p->allocsize)
+  {
+    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data)
+    {
+      p->allocsize = newsize;
+      p->data = (unsigned*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size)
+{
+  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+/*resize and give all new elements the value*/
+static unsigned uivector_resizev(uivector* p, size_t size, unsigned value)
+{
+  size_t oldsize = p->size, i;
+  if(!uivector_resize(p, size)) return 0;
+  for(i = oldsize; i < size; ++i) p->data[i] = value;
+  return 1;
+}
+
+static void uivector_init(uivector* p)
+{
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c)
+{
+  if(!uivector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector
+{
+  unsigned char* data;
+  size_t size; /*used size*/
+  size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_reserve(ucvector* p, size_t allocsize)
+{
+  if(allocsize > p->allocsize)
+  {
+    size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data)
+    {
+      p->allocsize = newsize;
+      p->data = (unsigned char*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size)
+{
+  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+#ifdef LODEPNG_COMPILE_PNG
+
+static void ucvector_cleanup(void* p)
+{
+  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
+  lodepng_free(((ucvector*)p)->data);
+  ((ucvector*)p)->data = NULL;
+}
+
+static void ucvector_init(ucvector* p)
+{
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*you can both convert from vector to buffer&size and vica versa. If you use
+init_buffer to take over a buffer and size, it is not needed to use cleanup*/
+static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size)
+{
+  p->data = buffer;
+  p->allocsize = p->size = size;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_push_back(ucvector* p, unsigned char c)
+{
+  if(!ucvector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned string_resize(char** out, size_t size)
+{
+  char* data = (char*)lodepng_realloc(*out, size + 1);
+  if(data)
+  {
+    data[size] = 0; /*null termination char*/
+    *out = data;
+  }
+  return data != 0;
+}
+
+/*init a {char*, size_t} pair for use as string*/
+static void string_init(char** out)
+{
+  *out = NULL;
+  string_resize(out, 0);
+}
+
+/*free the above pair again*/
+static void string_cleanup(char** out)
+{
+  lodepng_free(*out);
+  *out = NULL;
+}
+
+static void string_set(char** out, const char* in)
+{
+  size_t insize = strlen(in), i;
+  if(string_resize(out, insize))
+  {
+    for(i = 0; i != insize; ++i)
+    {
+      (*out)[i] = in[i];
+    }
+  }
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_read32bitInt(const unsigned char* buffer)
+{
+  return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]);
+}
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value)
+{
+  buffer[0] = (unsigned char)((value >> 24) & 0xff);
+  buffer[1] = (unsigned char)((value >> 16) & 0xff);
+  buffer[2] = (unsigned char)((value >>  8) & 0xff);
+  buffer[3] = (unsigned char)((value      ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static void lodepng_add32bitInt(ucvector* buffer, unsigned value)
+{
+  ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/
+  lodepng_set32bitInt(&buffer->data[buffer->size - 4], value);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename)
+{
+  FILE* file;
+  long size;
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+  *outsize = 0;
+
+  file = fopen(filename, "rb");
+  if(!file) return 78;
+
+  /*get filesize:*/
+  fseek(file , 0 , SEEK_END);
+  size = ftell(file);
+  rewind(file);
+
+  /*read contents of the file into the vector*/
+  *outsize = 0;
+  *out = (unsigned char*)lodepng_malloc((size_t)size);
+  if(size && (*out)) (*outsize) = fread(*out, 1, (size_t)size, file);
+
+  fclose(file);
+  if(!(*out) && size) return 83; /*the above malloc failed*/
+  return 0;
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename)
+{
+  FILE* file;
+  file = fopen(filename, "wb" );
+  if(!file) return 79;
+  fwrite((char*)buffer , 1 , buffersize, file);
+  fclose(file);
+  return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code.            // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*TODO: this ignores potential out of memory errors*/
+#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\
+{\
+  /*add a new byte at the end*/\
+  if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\
+  /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\
+  (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\
+  ++(*bitpointer);\
+}
+
+static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
+{
+  size_t i;
+  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1));
+}
+
+static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits)
+{
+  size_t i;
+  for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1));
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1)
+
+static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream)
+{
+  unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream));
+  ++(*bitpointer);
+  return result;
+}
+
+static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
+{
+  unsigned result = 0, i;
+  for(i = 0; i != nbits; ++i)
+  {
+    result += ((unsigned)READBIT(*bitpointer, bitstream)) << i;
+    ++(*bitpointer);
+  }
+  return result;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman                                                      / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+     67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+      4,  4,  4,   4,   5,   5,   5,   5,   0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
+       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
+
+/*the order in which "code length alphabet code lengths" are stored, out of this
+the huffman tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree
+{
+  unsigned* tree2d;
+  unsigned* tree1d;
+  unsigned* lengths; /*the lengths of the codes of the 1d-tree*/
+  unsigned maxbitlen; /*maximum number of bits a single code can get*/
+  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+} HuffmanTree;
+
+/*function used for debug purposes to draw the tree in ascii art with C++*/
+/*
+static void HuffmanTree_draw(HuffmanTree* tree)
+{
+  std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl;
+  for(size_t i = 0; i != tree->tree1d.size; ++i)
+  {
+    if(tree->lengths.data[i])
+      std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl;
+  }
+  std::cout << std::endl;
+}*/
+
+static void HuffmanTree_init(HuffmanTree* tree)
+{
+  tree->tree2d = 0;
+  tree->tree1d = 0;
+  tree->lengths = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree)
+{
+  lodepng_free(tree->tree2d);
+  lodepng_free(tree->tree1d);
+  lodepng_free(tree->lengths);
+}
+
+/*the tree representation used by the decoder. return value is error*/
+static unsigned HuffmanTree_make2DTree(HuffmanTree* tree)
+{
+  unsigned nodefilled = 0; /*up to which node it is filled*/
+  unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/
+  unsigned n, i;
+
+  tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned));
+  if(!tree->tree2d) return 83; /*alloc fail*/
+
+  /*
+  convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means
+  uninited, a value >= numcodes is an address to another bit, a value < numcodes
+  is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as
+  many columns as codes - 1.
+  A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+  Here, the internal nodes are stored (what their 0 and 1 option point to).
+  There is only memory for such good tree currently, if there are more nodes
+  (due to too long length codes), error 55 will happen
+  */
+  for(n = 0; n < tree->numcodes * 2; ++n)
+  {
+    tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/
+  }
+
+  for(n = 0; n < tree->numcodes; ++n) /*the codes*/
+  {
+    for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/
+    {
+      unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1);
+      /*oversubscribed, see comment in lodepng_error_text*/
+      if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55;
+      if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/
+      {
+        if(i + 1 == tree->lengths[n]) /*last bit*/
+        {
+          tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/
+          treepos = 0;
+        }
+        else
+        {
+          /*put address of the next step in here, first that address has to be found of course
+          (it's just nodefilled + 1)...*/
+          ++nodefilled;
+          /*addresses encoded with numcodes added to it*/
+          tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes;
+          treepos = nodefilled;
+        }
+      }
+      else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes;
+    }
+  }
+
+  for(n = 0; n < tree->numcodes * 2; ++n)
+  {
+    if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/
+  }
+
+  return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree)
+{
+  uivector blcount;
+  uivector nextcode;
+  unsigned error = 0;
+  unsigned bits, n;
+
+  uivector_init(&blcount);
+  uivector_init(&nextcode);
+
+  tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+  if(!tree->tree1d) error = 83; /*alloc fail*/
+
+  if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0)
+  || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0))
+    error = 83; /*alloc fail*/
+
+  if(!error)
+  {
+    /*step 1: count number of instances of each code length*/
+    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]];
+    /*step 2: generate the nextcode values*/
+    for(bits = 1; bits <= tree->maxbitlen; ++bits)
+    {
+      nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1;
+    }
+    /*step 3: generate all the codes*/
+    for(n = 0; n != tree->numcodes; ++n)
+    {
+      if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++;
+    }
+  }
+
+  uivector_cleanup(&blcount);
+  uivector_cleanup(&nextcode);
+
+  if(!error) return HuffmanTree_make2DTree(tree);
+  else return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+                                            size_t numcodes, unsigned maxbitlen)
+{
+  unsigned i;
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->maxbitlen = maxbitlen;
+  return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode
+{
+  int weight; /*the sum of all weights in this chain*/
+  unsigned index; /*index of this leaf node (called "count" in the paper)*/
+  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+  int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists
+{
+  /*memory pool*/
+  unsigned memsize;
+  BPMNode* memory;
+  unsigned numfree;
+  unsigned nextfree;
+  BPMNode** freelist;
+  /*two heads of lookahead chains per list*/
+  unsigned listsize;
+  BPMNode** chains0;
+  BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail)
+{
+  unsigned i;
+  BPMNode* result;
+
+  /*memory full, so garbage collect*/
+  if(lists->nextfree >= lists->numfree)
+  {
+    /*mark only those that are in use*/
+    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+    for(i = 0; i != lists->listsize; ++i)
+    {
+      BPMNode* node;
+      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+    }
+    /*collect those that are free*/
+    lists->numfree = 0;
+    for(i = 0; i != lists->memsize; ++i)
+    {
+      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+    }
+    lists->nextfree = 0;
+  }
+
+  result = lists->freelist[lists->nextfree++];
+  result->weight = weight;
+  result->index = index;
+  result->tail = tail;
+  return result;
+}
+
+static int bpmnode_compare(const void* a, const void* b)
+{
+  int wa = ((const BPMNode*)a)->weight;
+  int wb = ((const BPMNode*)b)->weight;
+  if(wa < wb) return -1;
+  if(wa > wb) return 1;
+  /*make the qsort a stable sort*/
+  return ((const BPMNode*)a)->index < ((const BPMNode*)b)->index ? 1 : -1;
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num)
+{
+  unsigned lastindex = lists->chains1[c]->index;
+
+  if(c == 0)
+  {
+    if(lastindex >= numpresent) return;
+    lists->chains0[c] = lists->chains1[c];
+    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+  }
+  else
+  {
+    /*sum of the weights of the head nodes of the previous lookahead chains.*/
+    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+    lists->chains0[c] = lists->chains1[c];
+    if(lastindex < numpresent && sum > leaves[lastindex].weight)
+    {
+      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+      return;
+    }
+    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+    /*in the end we are only interested in the chain of the last list, so no
+    need to recurse if we're at the last one (this gives measurable speedup)*/
+    if(num + 1 < (int)(2 * numpresent - 2))
+    {
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+    }
+  }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen)
+{
+  unsigned error = 0;
+  unsigned i;
+  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+  if((1ull << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/
+
+  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+  if(!leaves) return 83; /*alloc fail*/
+
+  for(i = 0; i != numcodes; ++i)
+  {
+    if(frequencies[i] > 0)
+    {
+      leaves[numpresent].weight = (int)frequencies[i];
+      leaves[numpresent].index = i;
+      ++numpresent;
+    }
+  }
+
+  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
+
+  /*ensure at least two present symbols. There should be at least one symbol
+  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+  make these work as well ensure there are at least two symbols. The
+  Package-Merge code below also doesn't work correctly if there's only one
+  symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/
+  if(numpresent == 0)
+  {
+    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+  }
+  else if(numpresent == 1)
+  {
+    lengths[leaves[0].index] = 1;
+    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+  }
+  else
+  {
+    BPMLists lists;
+    BPMNode* node;
+
+    qsort(leaves, numpresent, sizeof(BPMNode), bpmnode_compare);
+
+    lists.listsize = maxbitlen;
+    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+    lists.nextfree = 0;
+    lists.numfree = lists.memsize;
+    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+    if(!error)
+    {
+      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+      bpmnode_create(&lists, leaves[0].weight, 1, 0);
+      bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+      for(i = 0; i != lists.listsize; ++i)
+      {
+        lists.chains0[i] = &lists.memory[0];
+        lists.chains1[i] = &lists.memory[1];
+      }
+
+      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail)
+      {
+        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+      }
+    }
+
+    lodepng_free(lists.memory);
+    lodepng_free(lists.freelist);
+    lodepng_free(lists.chains0);
+    lodepng_free(lists.chains1);
+  }
+
+  lodepng_free(leaves);
+  return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+                                                size_t mincodes, size_t numcodes, unsigned maxbitlen)
+{
+  unsigned error = 0;
+  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->maxbitlen = maxbitlen;
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  /*initialize all lengths to 0*/
+  memset(tree->lengths, 0, numcodes * sizeof(unsigned));
+
+  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+  if(!error) error = HuffmanTree_makeFromLengths2(tree);
+  return error;
+}
+
+static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index)
+{
+  return tree->tree1d[index];
+}
+
+static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index)
+{
+  return tree->lengths[index];
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree)
+{
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
+  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree)
+{
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*there are 32 distance codes, but 30-31 are unused*/
+  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code, or (unsigned)(-1) if error happened
+inbitlength is the length of the complete buffer, in bits (so its byte length times 8)
+*/
+static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp,
+                                    const HuffmanTree* codetree, size_t inbitlength)
+{
+  unsigned treepos = 0, ct;
+  for(;;)
+  {
+    if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/
+    /*
+    decode the symbol from the tree. The "readBitFromStream" code is inlined in
+    the expression below because this is the biggest bottleneck while decoding
+    */
+    ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)];
+    ++(*bp);
+    if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/
+    else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/
+
+    if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/
+  }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor)                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d)
+{
+  /*TODO: check for out of memory errors*/
+  generateFixedLitLenTree(tree_ll);
+  generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+                                      const unsigned char* in, size_t* bp, size_t inlength)
+{
+  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+  unsigned error = 0;
+  unsigned n, HLIT, HDIST, HCLEN, i;
+  size_t inbitlength = inlength * 8;
+
+  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+  unsigned* bitlen_d = 0; /*dist code lengths*/
+  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+  unsigned* bitlen_cl = 0;
+  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+  if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+  HLIT =  readBitsFromStream(bp, in, 5) + 257;
+  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+  HDIST = readBitsFromStream(bp, in, 5) + 1;
+  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+  HCLEN = readBitsFromStream(bp, in, 4) + 4;
+
+  if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/
+
+  HuffmanTree_init(&tree_cl);
+
+  while(!error)
+  {
+    /*read the code length codes out of 3 * (amount of code length codes) bits*/
+
+    bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+    if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/);
+
+    for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i)
+    {
+      if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3);
+      else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/
+    }
+
+    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*now we can use this tree to read the lengths for the tree that this function will return*/
+    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
+    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
+
+    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+    i = 0;
+    while(i < HLIT + HDIST)
+    {
+      unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength);
+      if(code <= 15) /*a length code*/
+      {
+        if(i < HLIT) bitlen_ll[i] = code;
+        else bitlen_d[i - HLIT] = code;
+        ++i;
+      }
+      else if(code == 16) /*repeat previous*/
+      {
+        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+        unsigned value; /*set value to the previous code*/
+
+        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+        if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 2);
+
+        if(i < HLIT + 1) value = bitlen_ll[i - 1];
+        else value = bitlen_d[i - HLIT - 1];
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n)
+        {
+          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+          if(i < HLIT) bitlen_ll[i] = value;
+          else bitlen_d[i - HLIT] = value;
+          ++i;
+        }
+      }
+      else if(code == 17) /*repeat "0" 3-10 times*/
+      {
+        unsigned replength = 3; /*read in the bits that indicate repeat length*/
+        if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 3);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n)
+        {
+          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      }
+      else if(code == 18) /*repeat "0" 11-138 times*/
+      {
+        unsigned replength = 11; /*read in the bits that indicate repeat length*/
+        if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+        replength += readBitsFromStream(bp, in, 7);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n)
+        {
+          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      }
+      else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
+      {
+        if(code == (unsigned)(-1))
+        {
+          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+          (10=no endcode, 11=wrong jump outside of tree)*/
+          error = (*bp) > inbitlength ? 10 : 11;
+        }
+        else error = 16; /*unexisting code, this can never happen*/
+        break;
+      }
+    }
+    if(error) break;
+
+    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+    if(error) break;
+    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+    break; /*end of error-while*/
+  }
+
+  lodepng_free(bitlen_cl);
+  lodepng_free(bitlen_ll);
+  lodepng_free(bitlen_d);
+  HuffmanTree_cleanup(&tree_cl);
+
+  return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree*/
+static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp,
+                                    size_t* pos, size_t inlength, unsigned btype)
+{
+  unsigned error = 0;
+  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+  HuffmanTree tree_d; /*the huffman tree for distance codes*/
+  size_t inbitlength = inlength * 8;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
+  else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength);
+
+  while(!error) /*decode all symbols until end reached, breaks at end code*/
+  {
+    /*code_ll is literal, length or end code*/
+    unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength);
+    if(code_ll <= 255) /*literal symbol*/
+    {
+      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
+      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[*pos] = (unsigned char)code_ll;
+      ++(*pos);
+    }
+    else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/
+    {
+      unsigned code_d, distance;
+      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+      size_t start, forward, backward, length;
+
+      /*part 1: get length base*/
+      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+      /*part 2: get extra bits and add the value of that to length*/
+      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+      if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+      length += readBitsFromStream(bp, in, numextrabits_l);
+
+      /*part 3: get distance code*/
+      code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength);
+      if(code_d > 29)
+      {
+        if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
+        {
+          /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+          (10=no endcode, 11=wrong jump outside of tree)*/
+          error = (*bp) > inlength * 8 ? 10 : 11;
+        }
+        else error = 18; /*error: invalid distance code (30-31 are never used)*/
+        break;
+      }
+      distance = DISTANCEBASE[code_d];
+
+      /*part 4: get extra bits from distance*/
+      numextrabits_d = DISTANCEEXTRA[code_d];
+      if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/
+      distance += readBitsFromStream(bp, in, numextrabits_d);
+
+      /*part 5: fill in all the out[n] values based on the length and dist*/
+      start = (*pos);
+      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+      backward = start - distance;
+
+      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if (distance < length) {
+        for(forward = 0; forward < length; ++forward)
+        {
+          out->data[(*pos)++] = out->data[backward++];
+        }
+      } else {
+        memcpy(out->data + *pos, out->data + backward, length);
+        *pos += length;
+      }
+    }
+    else if(code_ll == 256)
+    {
+      break; /*end code, break the loop*/
+    }
+    else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/
+    {
+      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+      (10=no endcode, 11=wrong jump outside of tree)*/
+      error = ((*bp) > inlength * 8) ? 10 : 11;
+      break;
+    }
+  }
+
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength)
+{
+  size_t p;
+  unsigned LEN, NLEN, n, error = 0;
+
+  /*go to first boundary of byte*/
+  while(((*bp) & 0x7) != 0) ++(*bp);
+  p = (*bp) / 8; /*byte position*/
+
+  /*read LEN (2 bytes) and NLEN (2 bytes)*/
+  if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/
+  LEN = in[p] + 256u * in[p + 1]; p += 2;
+  NLEN = in[p] + 256u * in[p + 1]; p += 2;
+
+  /*check if 16-bit NLEN is really the one's complement of LEN*/
+  if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/
+
+  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
+
+  /*read the literal data: LEN bytes are now stored in the out buffer*/
+  if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/
+  for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++];
+
+  (*bp) = p * 8;
+
+  return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings)
+{
+  /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/
+  size_t bp = 0;
+  unsigned BFINAL = 0;
+  size_t pos = 0; /*byte position in the out buffer*/
+  unsigned error = 0;
+
+  (void)settings;
+
+  while(!BFINAL)
+  {
+    unsigned BTYPE;
+    if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/
+    BFINAL = readBitFromStream(&bp, in);
+    BTYPE = 1u * readBitFromStream(&bp, in);
+    BTYPE += 2u * readBitFromStream(&bp, in);
+
+    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+    else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/
+    else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/
+
+    if(error) return error;
+  }
+
+  return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings)
+{
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_inflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned inflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGDecompressSettings* settings)
+{
+  if(settings->custom_inflate)
+  {
+    return settings->custom_inflate(out, outsize, in, insize, settings);
+  }
+  else
+  {
+    return lodepng_inflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor)                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*bitlen is the size in bits of the code*/
+static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen)
+{
+  addBitsToStreamReversed(bp, compressed, code, bitlen);
+}
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value)
+{
+  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+  size_t left = 1;
+  size_t right = array_size - 1;
+
+  while(left <= right) {
+    size_t mid = (left + right) >> 1;
+    if (array[mid] >= value) right = mid - 1;
+    else left = mid + 1;
+  }
+  if(left >= array_size || array[left] > value) left--;
+  return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance)
+{
+  /*values in encoded vector are those used by deflate:
+  0-255: literal bytes
+  256: end
+  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+  286-287: invalid*/
+
+  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
+  uivector_push_back(values, extra_length);
+  uivector_push_back(values, dist_code);
+  uivector_push_back(values, extra_distance);
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash
+{
+  int* head; /*hash value to head circular pos - can be outdated if went around window*/
+  /*circular pos to prev circular pos*/
+  unsigned short* chain;
+  int* val; /*circular pos to hash value*/
+
+  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+  it's always going to be the zeros that dominate, so not important for PNG*/
+  int* headz; /*similar to head, but for chainz*/
+  unsigned short* chainz; /*those with same amount of zeros*/
+  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize)
+{
+  unsigned i;
+  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros)
+  {
+    return 83; /*alloc fail*/
+  }
+
+  /*initialize hash table*/
+  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+  return 0;
+}
+
+static void hash_cleanup(Hash* hash)
+{
+  lodepng_free(hash->head);
+  lodepng_free(hash->val);
+  lodepng_free(hash->chain);
+
+  lodepng_free(hash->zeros);
+  lodepng_free(hash->headz);
+  lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos)
+{
+  unsigned result = 0;
+  if(pos + 2 < size)
+  {
+    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+    by zeroes due to the filters, a better hash does not have a significant
+    effect on speed in traversing the chain, and causes more time spend on
+    calculating the hash.*/
+    result ^= (unsigned)(data[pos + 0] << 0u);
+    result ^= (unsigned)(data[pos + 1] << 4u);
+    result ^= (unsigned)(data[pos + 2] << 8u);
+  } else {
+    size_t amount, i;
+    if(pos >= size) return 0;
+    amount = size - pos;
+    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
+  }
+  return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos)
+{
+  const unsigned char* start = data + pos;
+  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+  if(end > data + size) end = data + size;
+  data = start;
+  while(data != end && *data == 0) ++data;
+  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+  return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros)
+{
+  hash->val[wpos] = (int)hashval;
+  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+  hash->head[hashval] = (int)wpos;
+
+  hash->zeros[wpos] = numzeros;
+  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+  hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+                           unsigned minmatch, unsigned nicematch, unsigned lazymatching)
+{
+  size_t pos;
+  unsigned i, error = 0;
+  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8;
+  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+  unsigned numzeros = 0;
+
+  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+  unsigned length;
+  unsigned lazy = 0;
+  unsigned lazylength = 0, lazyoffset = 0;
+  unsigned hashval;
+  unsigned current_offset, current_length;
+  unsigned prev_offset;
+  const unsigned char *lastptr, *foreptr, *backptr;
+  unsigned hashpos;
+
+  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+  for(pos = inpos; pos < insize; ++pos)
+  {
+    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+    unsigned chainlength = 0;
+
+    hashval = getHash(in, insize, pos);
+
+    if(usezeros && hashval == 0)
+    {
+      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+    }
+    else
+    {
+      numzeros = 0;
+    }
+
+    updateHashChain(hash, wpos, hashval, numzeros);
+
+    /*the length and offset found for the current position*/
+    length = 0;
+    offset = 0;
+
+    hashpos = hash->chain[wpos];
+
+    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+    /*search for the longest string*/
+    prev_offset = 0;
+    for(;;)
+    {
+      if(chainlength++ >= maxchainlength) break;
+      current_offset = hashpos <= wpos ? (unsigned int)(wpos - hashpos) : (unsigned int)(wpos - hashpos + windowsize);
+
+      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+      prev_offset = current_offset;
+      if(current_offset > 0)
+      {
+        /*test the next characters*/
+        foreptr = &in[pos];
+        backptr = &in[pos - current_offset];
+
+        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+        if(numzeros >= 3)
+        {
+          unsigned skip = hash->zeros[hashpos];
+          if(skip > numzeros) skip = numzeros;
+          backptr += skip;
+          foreptr += skip;
+        }
+
+        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/
+        {
+          ++backptr;
+          ++foreptr;
+        }
+        current_length = (unsigned)(foreptr - &in[pos]);
+
+        if(current_length > length)
+        {
+          length = current_length; /*the longest length*/
+          offset = current_offset; /*the offset that is related to this longest length*/
+          /*jump out once a length of max length is found (speed gain). This also jumps
+          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+          if(current_length >= nicematch) break;
+        }
+      }
+
+      if(hashpos == hash->chain[hashpos]) break;
+
+      if(numzeros >= 3 && length > numzeros)
+      {
+        hashpos = hash->chainz[hashpos];
+        if(hash->zeros[hashpos] != numzeros) break;
+      }
+      else
+      {
+        hashpos = hash->chain[hashpos];
+        /*outdated hash value, happens if particular value was not encountered in whole last window*/
+        if(hash->val[hashpos] != (int)hashval) break;
+      }
+    }
+
+    if(lazymatching)
+    {
+      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH)
+      {
+        lazy = 1;
+        lazylength = length;
+        lazyoffset = offset;
+        continue; /*try the next byte*/
+      }
+      if(lazy)
+      {
+        lazy = 0;
+        if(pos == 0) ERROR_BREAK(81);
+        if(length > lazylength + 1)
+        {
+          /*push the previous character as literal*/
+          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+        }
+        else
+        {
+          length = lazylength;
+          offset = lazyoffset;
+          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+          hash->headz[numzeros] = -1; /*idem*/
+          --pos;
+        }
+      }
+    }
+    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+    /*encode it as length/distance pair or literal value*/
+    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/
+    {
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    }
+    else if(length < minmatch || (length == 3 && offset > 4096))
+    {
+      /*compensate for the fact that longer offsets have more extra bits, a
+      length of only 3 may be not worth it then*/
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    }
+    else
+    {
+      addLengthDistance(out, length, offset);
+      for(i = 1; i < length; ++i)
+      {
+        ++pos;
+        wpos = pos & (windowsize - 1);
+        hashval = getHash(in, insize, pos);
+        if(usezeros && hashval == 0)
+        {
+          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+        }
+        else
+        {
+          numzeros = 0;
+        }
+        updateHashChain(hash, wpos, hashval, numzeros);
+      }
+    }
+  } /*end of the loop through each character of input*/
+
+  return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize)
+{
+  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+  size_t i, j, numdeflateblocks = (datasize + 65534) / 65535;
+  unsigned datapos = 0;
+  for(i = 0; i != numdeflateblocks; ++i)
+  {
+    unsigned BFINAL, BTYPE, LEN, NLEN;
+    unsigned char firstbyte;
+
+    BFINAL = (i == numdeflateblocks - 1);
+    BTYPE = 0;
+
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
+    ucvector_push_back(out, firstbyte);
+
+    LEN = 65535;
+    if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos;
+    NLEN = 65535 - LEN;
+
+    ucvector_push_back(out, (unsigned char)(LEN & 255));
+    ucvector_push_back(out, (unsigned char)(LEN >> 8));
+    ucvector_push_back(out, (unsigned char)(NLEN & 255));
+    ucvector_push_back(out, (unsigned char)(NLEN >> 8));
+
+    /*Decompressed data*/
+    for(j = 0; j < 65535 && datapos < datasize; ++j)
+    {
+      ucvector_push_back(out, data[datapos++]);
+    }
+  }
+
+  return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded,
+                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d)
+{
+  size_t i = 0;
+  for(i = 0; i != lz77_encoded->size; ++i)
+  {
+    unsigned val = lz77_encoded->data[i];
+    addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
+    if(val > 256) /*for a length code, 3 more things have to be added*/
+    {
+      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+      unsigned length_extra_bits = lz77_encoded->data[++i];
+
+      unsigned distance_code = lz77_encoded->data[++i];
+
+      unsigned distance_index = distance_code;
+      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+      unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+      addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits);
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code),
+                       HuffmanTree_getLength(tree_d, distance_code));
+      addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits);
+    }
+  }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash,
+                               const unsigned char* data, size_t datapos, size_t dataend,
+                               const LodePNGCompressSettings* settings, unsigned final)
+{
+  unsigned error = 0;
+
+  /*
+  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+  literal bytes and length/distance pairs. This is then huffman compressed with
+  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+  another huffman tree is used for the dist values ("d"). These two trees are
+  stored using their code lengths, and to compress even more these code lengths
+  are also run-length encoded and huffman compressed. This gives a huffman tree
+  of code lengths "cl". The code lenghts used to describe this third tree are
+  the code length code lengths ("clcl").
+  */
+
+  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+  uivector lz77_encoded;
+  HuffmanTree tree_ll; /*tree for lit,len values*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+  uivector frequencies_ll; /*frequency of lit,len codes*/
+  uivector frequencies_d; /*frequency of dist codes*/
+  uivector frequencies_cl; /*frequency of code length codes*/
+  uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/
+  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/
+  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
+  (these are written as is in the file, it would be crazy to compress these using yet another huffman
+  tree that needs to be represented by yet another set of code lengths)*/
+  uivector bitlen_cl;
+  size_t datasize = dataend - datapos;
+
+  /*
+  Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies:
+  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+  */
+
+  unsigned BFINAL = final;
+  size_t numcodes_ll, numcodes_d, i;
+  unsigned HLIT, HDIST, HCLEN;
+
+  uivector_init(&lz77_encoded);
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+  HuffmanTree_init(&tree_cl);
+  uivector_init(&frequencies_ll);
+  uivector_init(&frequencies_d);
+  uivector_init(&frequencies_cl);
+  uivector_init(&bitlen_lld);
+  uivector_init(&bitlen_lld_e);
+  uivector_init(&bitlen_cl);
+
+  /*This while loop never loops due to a break at the end, it is here to
+  allow breaking out of it to the cleanup phase on error conditions.*/
+  while(!error)
+  {
+    if(settings->use_lz77)
+    {
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(error) break;
+    }
+    else
+    {
+      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+    }
+
+    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
+
+    /*Count the frequencies of lit, len and dist codes*/
+    for(i = 0; i != lz77_encoded.size; ++i)
+    {
+      unsigned symbol = lz77_encoded.data[i];
+      ++frequencies_ll.data[symbol];
+      if(symbol > 256)
+      {
+        unsigned dist = lz77_encoded.data[i + 2];
+        ++frequencies_d.data[dist];
+        i += 3;
+      }
+    }
+    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
+    if(error) break;
+    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
+    if(error) break;
+
+    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
+    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
+    /*store the code lengths of both generated trees in bitlen_lld*/
+    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
+    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
+
+    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+    17 (3-10 zeroes), 18 (11-138 zeroes)*/
+    for(i = 0; i != (unsigned)bitlen_lld.size; ++i)
+    {
+      unsigned j = 0; /*amount of repititions*/
+      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
+
+      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/
+      {
+        ++j; /*include the first zero*/
+        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/
+        {
+          uivector_push_back(&bitlen_lld_e, 17);
+          uivector_push_back(&bitlen_lld_e, j - 3);
+        }
+        else /*repeat code 18 supports max 138 zeroes*/
+        {
+          if(j > 138) j = 138;
+          uivector_push_back(&bitlen_lld_e, 18);
+          uivector_push_back(&bitlen_lld_e, j - 11);
+        }
+        i += (j - 1);
+      }
+      else if(j >= 3) /*repeat code for value other than zero*/
+      {
+        size_t k;
+        unsigned num = j / 6, rest = j % 6;
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+        for(k = 0; k < num; ++k)
+        {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, 6 - 3);
+        }
+        if(rest >= 3)
+        {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, rest - 3);
+        }
+        else j -= rest;
+        i += j;
+      }
+      else /*too short to benefit from repeat code*/
+      {
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+      }
+    }
+
+    /*generate tree_cl, the huffmantree of huffmantrees*/
+
+    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != bitlen_lld_e.size; ++i)
+    {
+      ++frequencies_cl.data[bitlen_lld_e.data[i]];
+      /*after a repeat code come the bits that specify the number of repetitions,
+      those don't need to be in the frequencies_cl calculation*/
+      if(bitlen_lld_e.data[i] >= 16) ++i;
+    }
+
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
+                                            frequencies_cl.size, frequencies_cl.size, 7);
+    if(error) break;
+
+    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != tree_cl.numcodes; ++i)
+    {
+      /*lenghts of code length tree is in the order as specified by deflate*/
+      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
+    }
+    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4)
+    {
+      /*remove zeros at the end, but minimum size must be 4*/
+      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
+    }
+    if(error) break;
+
+    /*
+    Write everything into the output
+
+    After the BFINAL and BTYPE, the dynamic block consists out of the following:
+    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+    - (HCLEN+4)*3 bits code lengths of code length alphabet
+    - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - compressed data
+    - 256 (end code)
+    */
+
+    /*Write block type*/
+    addBitToStream(bp, out, BFINAL);
+    addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/
+    addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/
+
+    /*write the HLIT, HDIST and HCLEN values*/
+    HLIT = (unsigned)(numcodes_ll - 257);
+    HDIST = (unsigned)(numcodes_d - 1);
+    HCLEN = (unsigned)bitlen_cl.size - 4;
+    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
+    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
+    addBitsToStream(bp, out, HLIT, 5);
+    addBitsToStream(bp, out, HDIST, 5);
+    addBitsToStream(bp, out, HCLEN, 4);
+
+    /*write the code lenghts of the code length alphabet*/
+    for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3);
+
+    /*write the lenghts of the lit/len AND the dist alphabet*/
+    for(i = 0; i != bitlen_lld_e.size; ++i)
+    {
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
+                       HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
+      /*extra bits of repeat codes*/
+      if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2);
+      else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3);
+      else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7);
+    }
+
+    /*write the compressed data symbols*/
+    writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
+    /*error: the length of the end code 256 must be larger than 0*/
+    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
+
+    /*write the end code*/
+    addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+    break; /*end of error-while*/
+  }
+
+  /*cleanup*/
+  uivector_cleanup(&lz77_encoded);
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+  HuffmanTree_cleanup(&tree_cl);
+  uivector_cleanup(&frequencies_ll);
+  uivector_cleanup(&frequencies_d);
+  uivector_cleanup(&frequencies_cl);
+  uivector_cleanup(&bitlen_lld_e);
+  uivector_cleanup(&bitlen_lld);
+  uivector_cleanup(&bitlen_cl);
+
+  return error;
+}
+
+static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash,
+                             const unsigned char* data,
+                             size_t datapos, size_t dataend,
+                             const LodePNGCompressSettings* settings, unsigned final)
+{
+  HuffmanTree tree_ll; /*tree for literal values and length codes*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+
+  unsigned BFINAL = final;
+  unsigned error = 0;
+  size_t i;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  generateFixedLitLenTree(&tree_ll);
+  generateFixedDistanceTree(&tree_d);
+
+  addBitToStream(bp, out, BFINAL);
+  addBitToStream(bp, out, 1); /*first bit of BTYPE*/
+  addBitToStream(bp, out, 0); /*second bit of BTYPE*/
+
+  if(settings->use_lz77) /*LZ77 encoded*/
+  {
+    uivector lz77_encoded;
+    uivector_init(&lz77_encoded);
+    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                       settings->minmatch, settings->nicematch, settings->lazymatching);
+    if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d);
+    uivector_cleanup(&lz77_encoded);
+  }
+  else /*no LZ77, but still will be Huffman compressed*/
+  {
+    for(i = datapos; i < dataend; ++i)
+    {
+      addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
+    }
+  }
+  /*add END code*/
+  if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+  /*cleanup*/
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+                                 const LodePNGCompressSettings* settings)
+{
+  unsigned error = 0;
+  size_t i, blocksize, numdeflateblocks;
+  size_t bp = 0; /*the bit pointer*/
+  Hash hash;
+
+  if(settings->btype > 2) return 61;
+  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+  else if(settings->btype == 1) blocksize = insize;
+  else /*if(settings->btype == 2)*/
+  {
+    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+    blocksize = insize / 8 + 8;
+    if(blocksize < 65536) blocksize = 65536;
+    if(blocksize > 262144) blocksize = 262144;
+  }
+
+  numdeflateblocks = (insize + blocksize - 1) / blocksize;
+  if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+  error = hash_init(&hash, settings->windowsize);
+  if(error) return error;
+
+  for(i = 0; i != numdeflateblocks && !error; ++i)
+  {
+    unsigned final = (i == numdeflateblocks - 1);
+    size_t start = i * blocksize;
+    size_t end = start + blocksize;
+    if(end > insize) end = insize;
+
+    if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final);
+    else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final);
+  }
+
+  hash_cleanup(&hash);
+
+  return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings)
+{
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_deflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGCompressSettings* settings)
+{
+  if(settings->custom_deflate)
+  {
+    return settings->custom_deflate(out, outsize, in, insize, settings);
+  }
+  else
+  {
+    return lodepng_deflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32                                                                  */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len)
+{
+   unsigned s1 = adler & 0xffff;
+   unsigned s2 = (adler >> 16) & 0xffff;
+
+  while(len > 0)
+  {
+    /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/
+    unsigned amount = len > 5550 ? 5550 : len;
+    len -= amount;
+    while(amount > 0)
+    {
+      s1 += (*data++);
+      s2 += s1;
+      --amount;
+    }
+    s1 %= 65521;
+    s2 %= 65521;
+  }
+
+  return (s2 << 16) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len)
+{
+  return update_adler32(1L, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib                                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings)
+{
+  unsigned error = 0;
+  unsigned CM, CINFO, FDICT;
+
+  if(insize < 2) return 53; /*error, size of zlib data too small*/
+  /*read information from zlib header*/
+  if((in[0] * 256 + in[1]) % 31 != 0)
+  {
+    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+    return 24;
+  }
+
+  CM = in[0] & 15;
+  CINFO = (in[0] >> 4) & 15;
+  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+  FDICT = (in[1] >> 5) & 1;
+  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+  if(CM != 8 || CINFO > 7)
+  {
+    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+    return 25;
+  }
+  if(FDICT != 0)
+  {
+    /*error: the specification of PNG says about the zlib stream:
+      "The additional flags shall not specify a preset dictionary."*/
+    return 26;
+  }
+
+  error = inflate(out, outsize, in + 2, insize - 2, settings);
+  if(error) return error;
+
+  if(!settings->ignore_adler32)
+  {
+    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+    unsigned checksum = adler32(*out, (unsigned)(*outsize));
+    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+  }
+
+  return 0; /*no error*/
+}
+
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings)
+{
+  if(settings->custom_zlib)
+  {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  }
+  else
+  {
+    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                               size_t insize, const LodePNGCompressSettings* settings)
+{
+  /*initially, *out must be NULL and outsize 0, if you just give some random *out
+  that's pointing to a non allocated buffer, this'll crash*/
+  ucvector outv;
+  size_t i;
+  unsigned error;
+  unsigned char* deflatedata = 0;
+  size_t deflatesize = 0;
+
+  /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+  unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+  unsigned FLEVEL = 0;
+  unsigned FDICT = 0;
+  unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+  unsigned FCHECK = 31 - CMFFLG % 31;
+  CMFFLG += FCHECK;
+
+  /*ucvector-controlled version of the output buffer, for dynamic array*/
+  ucvector_init_buffer(&outv, *out, *outsize);
+
+  ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8));
+  ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255));
+
+  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+  if(!error)
+  {
+    unsigned ADLER32 = adler32(in, (unsigned)insize);
+    for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]);
+    lodepng_free(deflatedata);
+    lodepng_add32bitInt(&outv, ADLER32);
+  }
+
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings)
+{
+  if(settings->custom_zlib)
+  {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  }
+  else
+  {
+    return lodepng_zlib_compress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings)
+{
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings)
+{
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings)
+{
+  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+  settings->btype = 2;
+  settings->use_lz77 = 1;
+  settings->windowsize = DEFAULT_WINDOWSIZE;
+  settings->minmatch = 3;
+  settings->nicematch = 128;
+  settings->lazymatching = 1;
+
+  settings->custom_zlib = 0;
+  settings->custom_deflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings)
+{
+  settings->ignore_adler32 = 0;
+
+  settings->custom_zlib = 0;
+  settings->custom_inflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code.                 // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32                                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
+   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
+   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
+   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
+   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
+   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
+   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
+  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
+  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
+  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
+  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
+  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
+  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
+  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
+  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
+  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
+  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
+  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
+  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
+  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
+  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
+  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
+  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
+  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
+  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
+  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
+  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
+  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length)
+{
+  unsigned r = 0xffffffffu;
+  size_t i;
+  for(i = 0; i < length; ++i)
+  {
+    r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8);
+  }
+  return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing single bits and bytes from/to stream for LodePNG   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream)
+{
+  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+  ++(*bitpointer);
+  return result;
+}
+
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits)
+{
+  unsigned result = 0;
+  size_t i;
+  for(i = nbits - 1; i < nbits; --i)
+  {
+    result += (unsigned)readBitFromReversedStream(bitpointer, bitstream) << i;
+  }
+  return result;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
+{
+  /*the current bit in bitstream must be 0 for this to work*/
+  if(bit)
+  {
+    /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/
+    bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7)));
+  }
+  ++(*bitpointer);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit)
+{
+  /*the current bit in bitstream may be 0 or 1 for this to work*/
+  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
+  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
+  ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks                                                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk)
+{
+  return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk)
+{
+  unsigned i;
+  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+  type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type)
+{
+  if(strlen(type) != 4) return 0;
+  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk)
+{
+  return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk)
+{
+  return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk)
+{
+  return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk)
+{
+  return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk)
+{
+  return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk)
+{
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+  if(CRC != checksum) return 1;
+  else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk)
+{
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+  lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk)
+{
+  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  return &chunk[total_chunk_length];
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk)
+{
+  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  return &chunk[total_chunk_length];
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk)
+{
+  unsigned i;
+  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  unsigned char *chunk_start, *new_buffer;
+  size_t new_length = (*outlength) + total_chunk_length;
+  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
+
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk_start = &(*out)[new_length - total_chunk_length];
+
+  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+  return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data)
+{
+  unsigned i;
+  unsigned char *chunk, *new_buffer;
+  size_t new_length = (*outlength) + length + 12;
+  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk = &(*out)[(*outlength) - length - 12];
+
+  /*1: length*/
+  lodepng_set32bitInt(chunk, (unsigned)length);
+
+  /*2: chunk name (4 letters)*/
+  chunk[4] = (unsigned char)type[0];
+  chunk[5] = (unsigned char)type[1];
+  chunk[6] = (unsigned char)type[2];
+  chunk[7] = (unsigned char)type[3];
+
+  /*3: the data*/
+  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
+
+  /*4: CRC (of the chunkname characters and the data)*/
+  lodepng_chunk_generate_crc(chunk);
+
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types and such                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*return type is a LodePNG error code*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/
+{
+  switch(colortype)
+  {
+    case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/
+    case 2: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGB*/
+    case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break; /*palette*/
+    case 4: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/
+    case 6: if(!(                                 bd == 8 || bd == 16)) return 37; break; /*RGBA*/
+    default: return 31;
+  }
+  return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype)
+{
+  switch(colortype)
+  {
+    case 0: return 1; /*grey*/
+    case 2: return 3; /*RGB*/
+    case 3: return 1; /*palette*/
+    case 4: return 2; /*grey + alpha*/
+    case 6: return 4; /*RGBA*/
+  }
+  return 0; /*unexisting color type*/
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth)
+{
+  /*bits per pixel is amount of channels * bits per channel*/
+  return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info)
+{
+  info->key_defined = 0;
+  info->key_r = info->key_g = info->key_b = 0;
+  info->colortype = LCT_RGBA;
+  info->bitdepth = 8;
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info)
+{
+  lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source)
+{
+  size_t i;
+  lodepng_color_mode_cleanup(dest);
+  *dest = *source;
+  if(source->palette)
+  {
+    dest->palette = (unsigned char*)lodepng_malloc(1024);
+    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
+  }
+  return 0;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b)
+{
+  size_t i;
+  if(a->colortype != b->colortype) return 0;
+  if(a->bitdepth != b->bitdepth) return 0;
+  if(a->key_defined != b->key_defined) return 0;
+  if(a->key_defined)
+  {
+    if(a->key_r != b->key_r) return 0;
+    if(a->key_g != b->key_g) return 0;
+    if(a->key_b != b->key_b) return 0;
+  }
+  /*if one of the palette sizes is 0, then we consider it to be the same as the
+  other: it means that e.g. the palette was not given by the user and should be
+  considered the same as the palette inside the PNG.*/
+  if(1/*a->palettesize != 0 && b->palettesize != 0*/) {
+    if(a->palettesize != b->palettesize) return 0;
+    for(i = 0; i != a->palettesize * 4; ++i)
+    {
+      if(a->palette[i] != b->palette[i]) return 0;
+    }
+  }
+  return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info)
+{
+  if(info->palette) lodepng_free(info->palette);
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+  unsigned char* data;
+  /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with
+  the max of 256 colors, it'll have the exact alloc size*/
+  if(!info->palette) /*allocate palette if empty*/
+  {
+    /*room for 256 colors with 4 bytes each*/
+    data = (unsigned char*)lodepng_realloc(info->palette, 1024);
+    if(!data) return 83; /*alloc fail*/
+    else info->palette = data;
+  }
+  info->palette[4 * info->palettesize + 0] = r;
+  info->palette[4 * info->palettesize + 1] = g;
+  info->palette[4 * info->palettesize + 2] = b;
+  info->palette[4 * info->palettesize + 3] = a;
+  ++info->palettesize;
+  return 0;
+}
+
+unsigned lodepng_get_bpp(const LodePNGColorMode* info)
+{
+  /*calculate bits per pixel out of colortype and bitdepth*/
+  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info)
+{
+  return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info)
+{
+  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info)
+{
+  return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info)
+{
+  return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info)
+{
+  size_t i;
+  for(i = 0; i != info->palettesize; ++i)
+  {
+    if(info->palette[i * 4 + 3] < 255) return 1;
+  }
+  return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info)
+{
+  return info->key_defined
+      || lodepng_is_alpha_type(info)
+      || lodepng_has_palette_alpha(info);
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color)
+{
+  /*will not overflow for any color type if roughly w * h < 268435455*/
+  int bpp = lodepng_get_bpp(color);
+  size_t n = w * h;
+  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
+}
+
+size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
+{
+  /*will not overflow for any color type if roughly w * h < 268435455*/
+  int bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+  size_t n = w * h;
+  return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8;
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_DECODER
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color)
+{
+  /*will not overflow for any color type if roughly w * h < 268435455*/
+  int bpp = lodepng_get_bpp(color);
+  size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8;
+  return h * line;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info)
+{
+  unsigned i;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info)
+{
+  unsigned i;
+  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src)
+{
+  unsigned i;
+
+  LodePNGUnknownChunks_cleanup(dest);
+
+  for(i = 0; i != 3; ++i)
+  {
+    size_t j;
+    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+    for(j = 0; j < src->unknown_chunks_size[i]; ++j)
+    {
+      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info)
+{
+  info->text_num = 0;
+  info->text_keys = NULL;
+  info->text_strings = NULL;
+}
+
+static void LodePNGText_cleanup(LodePNGInfo* info)
+{
+  size_t i;
+  for(i = 0; i != info->text_num; ++i)
+  {
+    string_cleanup(&info->text_keys[i]);
+    string_cleanup(&info->text_strings[i]);
+  }
+  lodepng_free(info->text_keys);
+  lodepng_free(info->text_strings);
+}
+
+static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
+{
+  size_t i = 0;
+  dest->text_keys = 0;
+  dest->text_strings = 0;
+  dest->text_num = 0;
+  for(i = 0; i != source->text_num; ++i)
+  {
+    CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_text(LodePNGInfo* info)
+{
+  LodePNGText_cleanup(info);
+}
+
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str)
+{
+  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+  if(!new_keys || !new_strings)
+  {
+    lodepng_free(new_keys);
+    lodepng_free(new_strings);
+    return 83; /*alloc fail*/
+  }
+
+  ++info->text_num;
+  info->text_keys = new_keys;
+  info->text_strings = new_strings;
+
+  string_init(&info->text_keys[info->text_num - 1]);
+  string_set(&info->text_keys[info->text_num - 1], key);
+
+  string_init(&info->text_strings[info->text_num - 1]);
+  string_set(&info->text_strings[info->text_num - 1], str);
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGIText_init(LodePNGInfo* info)
+{
+  info->itext_num = 0;
+  info->itext_keys = NULL;
+  info->itext_langtags = NULL;
+  info->itext_transkeys = NULL;
+  info->itext_strings = NULL;
+}
+
+static void LodePNGIText_cleanup(LodePNGInfo* info)
+{
+  size_t i;
+  for(i = 0; i != info->itext_num; ++i)
+  {
+    string_cleanup(&info->itext_keys[i]);
+    string_cleanup(&info->itext_langtags[i]);
+    string_cleanup(&info->itext_transkeys[i]);
+    string_cleanup(&info->itext_strings[i]);
+  }
+  lodepng_free(info->itext_keys);
+  lodepng_free(info->itext_langtags);
+  lodepng_free(info->itext_transkeys);
+  lodepng_free(info->itext_strings);
+}
+
+static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source)
+{
+  size_t i = 0;
+  dest->itext_keys = 0;
+  dest->itext_langtags = 0;
+  dest->itext_transkeys = 0;
+  dest->itext_strings = 0;
+  dest->itext_num = 0;
+  for(i = 0; i != source->itext_num; ++i)
+  {
+    CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
+                                        source->itext_transkeys[i], source->itext_strings[i]));
+  }
+  return 0;
+}
+
+void lodepng_clear_itext(LodePNGInfo* info)
+{
+  LodePNGIText_cleanup(info);
+}
+
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str)
+{
+  char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
+  char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
+  if(!new_keys || !new_langtags || !new_transkeys || !new_strings)
+  {
+    lodepng_free(new_keys);
+    lodepng_free(new_langtags);
+    lodepng_free(new_transkeys);
+    lodepng_free(new_strings);
+    return 83; /*alloc fail*/
+  }
+
+  ++info->itext_num;
+  info->itext_keys = new_keys;
+  info->itext_langtags = new_langtags;
+  info->itext_transkeys = new_transkeys;
+  info->itext_strings = new_strings;
+
+  string_init(&info->itext_keys[info->itext_num - 1]);
+  string_set(&info->itext_keys[info->itext_num - 1], key);
+
+  string_init(&info->itext_langtags[info->itext_num - 1]);
+  string_set(&info->itext_langtags[info->itext_num - 1], langtag);
+
+  string_init(&info->itext_transkeys[info->itext_num - 1]);
+  string_set(&info->itext_transkeys[info->itext_num - 1], transkey);
+
+  string_init(&info->itext_strings[info->itext_num - 1]);
+  string_set(&info->itext_strings[info->itext_num - 1], str);
+
+  return 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+void lodepng_info_init(LodePNGInfo* info)
+{
+  lodepng_color_mode_init(&info->color);
+  info->interlace_method = 0;
+  info->compression_method = 0;
+  info->filter_method = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  info->background_defined = 0;
+  info->background_r = info->background_g = info->background_b = 0;
+
+  LodePNGText_init(info);
+  LodePNGIText_init(info);
+
+  info->time_defined = 0;
+  info->phys_defined = 0;
+
+  LodePNGUnknownChunks_init(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+void lodepng_info_cleanup(LodePNGInfo* info)
+{
+  lodepng_color_mode_cleanup(&info->color);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  LodePNGText_cleanup(info);
+  LodePNGIText_cleanup(info);
+
+  LodePNGUnknownChunks_cleanup(info);
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source)
+{
+  lodepng_info_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->color);
+  CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  CERROR_TRY_RETURN(LodePNGText_copy(dest, source));
+  CERROR_TRY_RETURN(LodePNGIText_copy(dest, source));
+
+  LodePNGUnknownChunks_init(dest);
+  CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source));
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  return 0;
+}
+
+void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b)
+{
+  LodePNGInfo temp = *a;
+  *a = *b;
+  *b = temp;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/
+static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in)
+{
+  unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/
+  /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/
+  unsigned p = index & m;
+  in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/
+  in = in << (bits * (m - p));
+  if(p == 0) out[index * bits / 8] = in;
+  else out[index * bits / 8] |= in;
+}
+
+typedef struct ColorTree ColorTree;
+
+/*
+One node of a color tree
+This is the data structure used to count the number of unique colors and to get a palette
+index for a color. It's like an octree, but because the alpha channel is used too, each
+node has 16 instead of 8 children.
+*/
+struct ColorTree
+{
+  ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/
+  int index; /*the payload. Only has a meaningful value if this is in the last level*/
+};
+
+static void color_tree_init(ColorTree* tree)
+{
+  int i;
+  for(i = 0; i != 16; ++i) tree->children[i] = 0;
+  tree->index = -1;
+}
+
+static void color_tree_cleanup(ColorTree* tree)
+{
+  int i;
+  for(i = 0; i != 16; ++i)
+  {
+    if(tree->children[i])
+    {
+      color_tree_cleanup(tree->children[i]);
+      lodepng_free(tree->children[i]);
+    }
+  }
+}
+
+/*returns -1 if color not present, its index otherwise*/
+static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+  int bit = 0;
+  for(bit = 0; bit < 8; ++bit)
+  {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i]) return -1;
+    else tree = tree->children[i];
+  }
+  return tree ? tree->index : -1;
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+  return color_tree_get(tree, r, g, b, a) >= 0;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*color is not allowed to already exist.
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
+static void color_tree_add(ColorTree* tree,
+                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index)
+{
+  int bit;
+  for(bit = 0; bit < 8; ++bit)
+  {
+    int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
+    if(!tree->children[i])
+    {
+      tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+      color_tree_init(tree->children[i]);
+    }
+    tree = tree->children[i];
+  }
+  tree->index = (int)index;
+}
+
+/*put a pixel, given its RGBA color, into image of any color type*/
+static unsigned rgba8ToPixel(unsigned char* out, size_t i,
+                             const LodePNGColorMode* mode, ColorTree* tree /*for palette*/,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+  if(mode->colortype == LCT_GREY)
+  {
+    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
+    if(mode->bitdepth == 8) out[i] = grey;
+    else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey;
+    else
+    {
+      /*take the most significant bits of grey*/
+      grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
+      addColorBits(out, i, mode->bitdepth, grey);
+    }
+  }
+  else if(mode->colortype == LCT_RGB)
+  {
+    if(mode->bitdepth == 8)
+    {
+      out[i * 3 + 0] = r;
+      out[i * 3 + 1] = g;
+      out[i * 3 + 2] = b;
+    }
+    else
+    {
+      out[i * 6 + 0] = out[i * 6 + 1] = r;
+      out[i * 6 + 2] = out[i * 6 + 3] = g;
+      out[i * 6 + 4] = out[i * 6 + 5] = b;
+    }
+  }
+  else if(mode->colortype == LCT_PALETTE)
+  {
+    int index = color_tree_get(tree, r, g, b, a);
+    if(index < 0) return 82; /*color not in palette*/
+    if(mode->bitdepth == 8) out[i] = index;
+    else addColorBits(out, i, mode->bitdepth, (unsigned)index);
+  }
+  else if(mode->colortype == LCT_GREY_ALPHA)
+  {
+    unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/;
+    if(mode->bitdepth == 8)
+    {
+      out[i * 2 + 0] = grey;
+      out[i * 2 + 1] = a;
+    }
+    else if(mode->bitdepth == 16)
+    {
+      out[i * 4 + 0] = out[i * 4 + 1] = grey;
+      out[i * 4 + 2] = out[i * 4 + 3] = a;
+    }
+  }
+  else if(mode->colortype == LCT_RGBA)
+  {
+    if(mode->bitdepth == 8)
+    {
+      out[i * 4 + 0] = r;
+      out[i * 4 + 1] = g;
+      out[i * 4 + 2] = b;
+      out[i * 4 + 3] = a;
+    }
+    else
+    {
+      out[i * 8 + 0] = out[i * 8 + 1] = r;
+      out[i * 8 + 2] = out[i * 8 + 3] = g;
+      out[i * 8 + 4] = out[i * 8 + 5] = b;
+      out[i * 8 + 6] = out[i * 8 + 7] = a;
+    }
+  }
+
+  return 0; /*no error*/
+}
+
+/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/
+static void rgba16ToPixel(unsigned char* out, size_t i,
+                         const LodePNGColorMode* mode,
+                         unsigned short r, unsigned short g, unsigned short b, unsigned short a)
+{
+  if(mode->colortype == LCT_GREY)
+  {
+    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
+    out[i * 2 + 0] = (grey >> 8) & 255;
+    out[i * 2 + 1] = grey & 255;
+  }
+  else if(mode->colortype == LCT_RGB)
+  {
+    out[i * 6 + 0] = (r >> 8) & 255;
+    out[i * 6 + 1] = r & 255;
+    out[i * 6 + 2] = (g >> 8) & 255;
+    out[i * 6 + 3] = g & 255;
+    out[i * 6 + 4] = (b >> 8) & 255;
+    out[i * 6 + 5] = b & 255;
+  }
+  else if(mode->colortype == LCT_GREY_ALPHA)
+  {
+    unsigned short grey = r; /*((unsigned)r + g + b) / 3*/;
+    out[i * 4 + 0] = (grey >> 8) & 255;
+    out[i * 4 + 1] = grey & 255;
+    out[i * 4 + 2] = (a >> 8) & 255;
+    out[i * 4 + 3] = a & 255;
+  }
+  else if(mode->colortype == LCT_RGBA)
+  {
+    out[i * 8 + 0] = (r >> 8) & 255;
+    out[i * 8 + 1] = r & 255;
+    out[i * 8 + 2] = (g >> 8) & 255;
+    out[i * 8 + 3] = g & 255;
+    out[i * 8 + 4] = (b >> 8) & 255;
+    out[i * 8 + 5] = b & 255;
+    out[i * 8 + 6] = (a >> 8) & 255;
+    out[i * 8 + 7] = a & 255;
+  }
+}
+
+/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/
+static void getPixelColorRGBA8(unsigned char* r, unsigned char* g,
+                               unsigned char* b, unsigned char* a,
+                               const unsigned char* in, size_t i,
+                               const LodePNGColorMode* mode)
+{
+  if(mode->colortype == LCT_GREY)
+  {
+    if(mode->bitdepth == 8)
+    {
+      *r = *g = *b = in[i];
+      if(mode->key_defined && *r == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+    else if(mode->bitdepth == 16)
+    {
+      *r = *g = *b = in[i * 2 + 0];
+      if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+    else
+    {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = i * mode->bitdepth;
+      unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+      *r = *g = *b = (value * 255) / highest;
+      if(mode->key_defined && value == mode->key_r) *a = 0;
+      else *a = 255;
+    }
+  }
+  else if(mode->colortype == LCT_RGB)
+  {
+    if(mode->bitdepth == 8)
+    {
+      *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2];
+      if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0;
+      else *a = 255;
+    }
+    else
+    {
+      *r = in[i * 6 + 0];
+      *g = in[i * 6 + 2];
+      *b = in[i * 6 + 4];
+      if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+         && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+         && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+      else *a = 255;
+    }
+  }
+  else if(mode->colortype == LCT_PALETTE)
+  {
+    unsigned index;
+    if(mode->bitdepth == 8) index = in[i];
+    else
+    {
+      size_t j = i * mode->bitdepth;
+      index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+    }
+
+    if(index >= mode->palettesize)
+    {
+      /*This is an error according to the PNG spec, but common PNG decoders make it black instead.
+      Done here too, slightly faster due to no error handling needed.*/
+      *r = *g = *b = 0;
+      *a = 255;
+    }
+    else
+    {
+      *r = mode->palette[index * 4 + 0];
+      *g = mode->palette[index * 4 + 1];
+      *b = mode->palette[index * 4 + 2];
+      *a = mode->palette[index * 4 + 3];
+    }
+  }
+  else if(mode->colortype == LCT_GREY_ALPHA)
+  {
+    if(mode->bitdepth == 8)
+    {
+      *r = *g = *b = in[i * 2 + 0];
+      *a = in[i * 2 + 1];
+    }
+    else
+    {
+      *r = *g = *b = in[i * 4 + 0];
+      *a = in[i * 4 + 2];
+    }
+  }
+  else if(mode->colortype == LCT_RGBA)
+  {
+    if(mode->bitdepth == 8)
+    {
+      *r = in[i * 4 + 0];
+      *g = in[i * 4 + 1];
+      *b = in[i * 4 + 2];
+      *a = in[i * 4 + 3];
+    }
+    else
+    {
+      *r = in[i * 8 + 0];
+      *g = in[i * 8 + 2];
+      *b = in[i * 8 + 4];
+      *a = in[i * 8 + 6];
+    }
+  }
+}
+
+/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color
+mode test cases, optimized to convert the colors much faster, when converting
+to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with
+enough memory, if has_alpha is true the output is RGBA. mode has the color mode
+of the input buffer.*/
+static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels,
+                                unsigned has_alpha, const unsigned char* in,
+                                const LodePNGColorMode* mode)
+{
+  unsigned num_channels = has_alpha ? 4 : 3;
+  size_t i;
+  if(mode->colortype == LCT_GREY)
+  {
+    if(mode->bitdepth == 8)
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = buffer[1] = buffer[2] = in[i];
+        if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255;
+      }
+    }
+    else if(mode->bitdepth == 16)
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2];
+        if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255;
+      }
+    }
+    else
+    {
+      unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/
+      size_t j = 0;
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth);
+        buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest;
+        if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255;
+      }
+    }
+  }
+  else if(mode->colortype == LCT_RGB)
+  {
+    if(mode->bitdepth == 8)
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = in[i * 3 + 0];
+        buffer[1] = in[i * 3 + 1];
+        buffer[2] = in[i * 3 + 2];
+        if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r
+           && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255;
+      }
+    }
+    else
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = in[i * 6 + 0];
+        buffer[1] = in[i * 6 + 2];
+        buffer[2] = in[i * 6 + 4];
+        if(has_alpha) buffer[3] = mode->key_defined
+           && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+           && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+           && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255;
+      }
+    }
+  }
+  else if(mode->colortype == LCT_PALETTE)
+  {
+    unsigned index;
+    size_t j = 0;
+    for(i = 0; i != numpixels; ++i, buffer += num_channels)
+    {
+      if(mode->bitdepth == 8) index = in[i];
+      else index = readBitsFromReversedStream(&j, in, mode->bitdepth);
+
+      if(index >= mode->palettesize)
+      {
+        /*This is an error according to the PNG spec, but most PNG decoders make it black instead.
+        Done here too, slightly faster due to no error handling needed.*/
+        buffer[0] = buffer[1] = buffer[2] = 0;
+        if(has_alpha) buffer[3] = 255;
+      }
+      else
+      {
+        buffer[0] = mode->palette[index * 4 + 0];
+        buffer[1] = mode->palette[index * 4 + 1];
+        buffer[2] = mode->palette[index * 4 + 2];
+        if(has_alpha) buffer[3] = mode->palette[index * 4 + 3];
+      }
+    }
+  }
+  else if(mode->colortype == LCT_GREY_ALPHA)
+  {
+    if(mode->bitdepth == 8)
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0];
+        if(has_alpha) buffer[3] = in[i * 2 + 1];
+      }
+    }
+    else
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0];
+        if(has_alpha) buffer[3] = in[i * 4 + 2];
+      }
+    }
+  }
+  else if(mode->colortype == LCT_RGBA)
+  {
+    if(mode->bitdepth == 8)
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = in[i * 4 + 0];
+        buffer[1] = in[i * 4 + 1];
+        buffer[2] = in[i * 4 + 2];
+        if(has_alpha) buffer[3] = in[i * 4 + 3];
+      }
+    }
+    else
+    {
+      for(i = 0; i != numpixels; ++i, buffer += num_channels)
+      {
+        buffer[0] = in[i * 8 + 0];
+        buffer[1] = in[i * 8 + 2];
+        buffer[2] = in[i * 8 + 4];
+        if(has_alpha) buffer[3] = in[i * 8 + 6];
+      }
+    }
+  }
+}
+
+/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with
+given color type, but the given color type must be 16-bit itself.*/
+static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a,
+                                const unsigned char* in, size_t i, const LodePNGColorMode* mode)
+{
+  if(mode->colortype == LCT_GREY)
+  {
+    *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1];
+    if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0;
+    else *a = 65535;
+  }
+  else if(mode->colortype == LCT_RGB)
+  {
+    *r = 256u * in[i * 6 + 0] + in[i * 6 + 1];
+    *g = 256u * in[i * 6 + 2] + in[i * 6 + 3];
+    *b = 256u * in[i * 6 + 4] + in[i * 6 + 5];
+    if(mode->key_defined
+       && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r
+       && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g
+       && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0;
+    else *a = 65535;
+  }
+  else if(mode->colortype == LCT_GREY_ALPHA)
+  {
+    *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1];
+    *a = 256u * in[i * 4 + 2] + in[i * 4 + 3];
+  }
+  else if(mode->colortype == LCT_RGBA)
+  {
+    *r = 256u * in[i * 8 + 0] + in[i * 8 + 1];
+    *g = 256u * in[i * 8 + 2] + in[i * 8 + 3];
+    *b = 256u * in[i * 8 + 4] + in[i * 8 + 5];
+    *a = 256u * in[i * 8 + 6] + in[i * 8 + 7];
+  }
+}
+
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h)
+{
+  int i;
+  ColorTree tree;
+  size_t numpixels = w * h;
+
+  if(lodepng_color_mode_equal(mode_out, mode_in))
+  {
+    size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
+    for(i = 0; i != numbytes; ++i) out[i] = in[i];
+    return 0;
+  }
+
+  if(mode_out->colortype == LCT_PALETTE)
+  {
+    size_t palettesize = mode_out->palettesize;
+    const unsigned char* palette = mode_out->palette;
+    size_t palsize = 1u << mode_out->bitdepth;
+    /*if the user specified output palette but did not give the values, assume
+    they want the values of the input color type (assuming that one is palette).
+    Note that we never create a new palette ourselves.*/
+    if(palettesize == 0)
+    {
+      palettesize = mode_in->palettesize;
+      palette = mode_in->palette;
+    }
+    if(palettesize < palsize) palsize = palettesize;
+    color_tree_init(&tree);
+    for(i = 0; i != palsize; ++i)
+    {
+      const unsigned char* p = &palette[i * 4];
+      color_tree_add(&tree, p[0], p[1], p[2], p[3], i);
+    }
+  }
+
+  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16)
+  {
+    for(i = 0; i != numpixels; ++i)
+    {
+      unsigned short r = 0, g = 0, b = 0, a = 0;
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+      rgba16ToPixel(out, i, mode_out, r, g, b, a);
+    }
+  }
+  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA)
+  {
+    getPixelColorsRGBA8(out, numpixels, 1, in, mode_in);
+  }
+  else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB)
+  {
+    getPixelColorsRGBA8(out, numpixels, 0, in, mode_in);
+  }
+  else
+  {
+    unsigned char r = 0, g = 0, b = 0, a = 0;
+    for(i = 0; i != numpixels; ++i)
+    {
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+      CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a));
+    }
+  }
+
+  if(mode_out->colortype == LCT_PALETTE)
+  {
+    color_tree_cleanup(&tree);
+  }
+
+  return 0; /*no error*/
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+void lodepng_color_profile_init(LodePNGColorProfile* profile)
+{
+  profile->colored = 0;
+  profile->key = 0;
+  profile->alpha = 0;
+  profile->key_r = profile->key_g = profile->key_b = 0;
+  profile->numcolors = 0;
+  profile->bits = 1;
+}
+
+/*function used for debug purposes with C++*/
+/*void printColorProfile(LodePNGColorProfile* p)
+{
+  std::cout << "colored: " << (int)p->colored << ", ";
+  std::cout << "key: " << (int)p->key << ", ";
+  std::cout << "key_r: " << (int)p->key_r << ", ";
+  std::cout << "key_g: " << (int)p->key_g << ", ";
+  std::cout << "key_b: " << (int)p->key_b << ", ";
+  std::cout << "alpha: " << (int)p->alpha << ", ";
+  std::cout << "numcolors: " << (int)p->numcolors << ", ";
+  std::cout << "bits: " << (int)p->bits << std::endl;
+}*/
+
+/*Returns how many bits needed to represent given value (max 8 bit)*/
+static unsigned getValueRequiredBits(unsigned char value)
+{
+  if(value == 0 || value == 255) return 1;
+  /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/
+  if(value % 17 == 0) return value % 85 == 0 ? 2 : 4;
+  return 8;
+}
+
+/*profile must already have been inited with mode.
+It's ok to set some parameters of profile to done already.*/
+unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
+                                   const unsigned char* in, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode)
+{
+  unsigned error = 0;
+  size_t i;
+  ColorTree tree;
+  size_t numpixels = w * h;
+
+  unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0;
+  unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1;
+  unsigned numcolors_done = 0;
+  unsigned bpp = lodepng_get_bpp(mode);
+  unsigned bits_done = bpp == 1 ? 1 : 0;
+  unsigned maxnumcolors = 257;
+  unsigned sixteen = 0;
+  if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256));
+
+  color_tree_init(&tree);
+
+  /*Check if the 16-bit input is truly 16-bit*/
+  if(mode->bitdepth == 16)
+  {
+    unsigned short r, g, b, a;
+    for(i = 0; i != numpixels; ++i)
+    {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
+      if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
+         (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/
+      {
+        sixteen = 1;
+        break;
+      }
+    }
+  }
+
+  if(sixteen)
+  {
+    unsigned short r = 0, g = 0, b = 0, a = 0;
+    profile->bits = 16;
+    bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/
+
+    for(i = 0; i != numpixels; ++i)
+    {
+      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode);
+
+      if(!colored_done && (r != g || r != b))
+      {
+        profile->colored = 1;
+        colored_done = 1;
+      }
+
+      if(!alpha_done)
+      {
+        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
+        if(a != 65535 && (a != 0 || (profile->key && !matchkey)))
+        {
+          profile->alpha = 1;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+        else if(a == 0 && !profile->alpha && !profile->key)
+        {
+          profile->key = 1;
+          profile->key_r = r;
+          profile->key_g = g;
+          profile->key_b = b;
+        }
+        else if(a == 65535 && profile->key && matchkey)
+        {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          alpha_done = 1;
+        }
+      }
+
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+  }
+  else /* < 16-bit */
+  {
+    for(i = 0; i != numpixels; ++i)
+    {
+      unsigned char r = 0, g = 0, b = 0, a = 0;
+      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode);
+
+      if(!bits_done && profile->bits < 8)
+      {
+        /*only r is checked, < 8 bits is only relevant for greyscale*/
+        unsigned bits = getValueRequiredBits(r);
+        if(bits > profile->bits) profile->bits = bits;
+      }
+      bits_done = (profile->bits >= bpp);
+
+      if(!colored_done && (r != g || r != b))
+      {
+        profile->colored = 1;
+        colored_done = 1;
+        if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/
+      }
+
+      if(!alpha_done)
+      {
+        unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b);
+        if(a != 255 && (a != 0 || (profile->key && !matchkey)))
+        {
+          profile->alpha = 1;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+        else if(a == 0 && !profile->alpha && !profile->key)
+        {
+          profile->key = 1;
+          profile->key_r = r;
+          profile->key_g = g;
+          profile->key_b = b;
+        }
+        else if(a == 255 && profile->key && matchkey)
+        {
+          /* Color key cannot be used if an opaque pixel also has that RGB color. */
+          profile->alpha = 1;
+          alpha_done = 1;
+          if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+        }
+      }
+
+      if(!numcolors_done)
+      {
+        if(!color_tree_has(&tree, r, g, b, a))
+        {
+          color_tree_add(&tree, r, g, b, a, profile->numcolors);
+          if(profile->numcolors < 256)
+          {
+            unsigned char* p = profile->palette;
+            unsigned n = profile->numcolors;
+            p[n * 4 + 0] = r;
+            p[n * 4 + 1] = g;
+            p[n * 4 + 2] = b;
+            p[n * 4 + 3] = a;
+          }
+          ++profile->numcolors;
+          numcolors_done = profile->numcolors >= maxnumcolors;
+        }
+      }
+
+      if(alpha_done && numcolors_done && colored_done && bits_done) break;
+    }
+
+    /*make the profile's key always 16-bit for consistency - repeat each byte twice*/
+    profile->key_r += (profile->key_r << 8);
+    profile->key_g += (profile->key_g << 8);
+    profile->key_b += (profile->key_b << 8);
+  }
+
+  color_tree_cleanup(&tree);
+  return error;
+}
+
+/*Automatically chooses color type that gives smallest amount of bits in the
+output image, e.g. grey if there are only greyscale pixels, palette if there
+are less than 256 colors, ...
+Updates values of mode with a potentially smaller color model. mode_out should
+contain the user chosen color model, but will be overwritten with the new chosen one.*/
+unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in)
+{
+  LodePNGColorProfile prof;
+  unsigned error = 0;
+  unsigned i, n, palettebits, grey_ok, palette_ok;
+
+  lodepng_color_profile_init(&prof);
+  error = lodepng_get_color_profile(&prof, image, w, h, mode_in);
+  if(error) return error;
+  mode_out->key_defined = 0;
+
+  if(prof.key && w * h <= 16)
+  {
+    prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/
+    if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/
+  }
+  grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/
+  n = prof.numcolors;
+  palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8));
+  palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8;
+  if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/
+  if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/
+
+  if(palette_ok)
+  {
+    unsigned char* p = prof.palette;
+    lodepng_palette_clear(mode_out); /*remove potential earlier palette*/
+    for(i = 0; i != prof.numcolors; ++i)
+    {
+      error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]);
+      if(error) break;
+    }
+
+    mode_out->colortype = LCT_PALETTE;
+    mode_out->bitdepth = palettebits;
+
+    if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize
+        && mode_in->bitdepth == mode_out->bitdepth)
+    {
+      /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/
+      lodepng_color_mode_cleanup(mode_out);
+      lodepng_color_mode_copy(mode_out, mode_in);
+    }
+  }
+  else /*8-bit or 16-bit per channel*/
+  {
+    mode_out->bitdepth = prof.bits;
+    mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA)
+                                     : (prof.colored ? LCT_RGB : LCT_GREY);
+
+    if(prof.key && !prof.alpha)
+    {
+      unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/
+      mode_out->key_r = prof.key_r & mask;
+      mode_out->key_g = prof.key_g & mask;
+      mode_out->key_b = prof.key_b & mask;
+      mode_out->key_defined = 1;
+    }
+  }
+
+  return error;
+}
+
+#endif /* #ifdef LODEPNG_COMPILE_ENCODER */
+
+/*
+Paeth predicter, used by PNG filter type 4
+The parameters are of type short, but should come from unsigned chars, the shorts
+are only needed to make the paeth calculation correct.
+*/
+static unsigned char paethPredictor(short a, short b, short c)
+{
+  short pa = abs(b - c);
+  short pb = abs(a - c);
+  short pc = abs(a + b - c - c);
+
+  if(pc < pa && pc < pb) return (unsigned char)c;
+  else if(pb < pa) return (unsigned char)b;
+  else return (unsigned char)a;
+}
+
+/*shared values used by multiple Adam7 related functions*/
+
+static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/
+static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/
+static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/
+static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/
+
+/*
+Outputs various dimensions and positions in the image related to the Adam7 reduced images.
+passw: output containing the width of the 7 passes
+passh: output containing the height of the 7 passes
+filter_passstart: output containing the index of the start and end of each
+ reduced image with filter bytes
+padded_passstart output containing the index of the start and end of each
+ reduced image when without filter bytes but with padded scanlines
+passstart: output containing the index of the start and end of each reduced
+ image without padding between scanlines, but still padding between the images
+w, h: width and height of non-interlaced image
+bpp: bits per pixel
+"padded" is only relevant if bpp is less than 8 and a scanline or image does not
+ end at a full byte
+*/
+static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8],
+                                size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp)
+{
+  /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/
+  unsigned i;
+
+  /*calculate width and height in pixels of each pass*/
+  for(i = 0; i != 7; ++i)
+  {
+    passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i];
+    passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i];
+    if(passw[i] == 0) passh[i] = 0;
+    if(passh[i] == 0) passw[i] = 0;
+  }
+
+  filter_passstart[0] = padded_passstart[0] = passstart[0] = 0;
+  for(i = 0; i != 7; ++i)
+  {
+    /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/
+    filter_passstart[i + 1] = filter_passstart[i]
+                            + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0);
+    /*bits padded if needed to fill full byte at end of each scanline*/
+    padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8);
+    /*only padded at end of reduced image*/
+    passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8;
+  }
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Decoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*read the information from the header and store it in the LodePNGInfo. return value is error*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state,
+                         const unsigned char* in, size_t insize)
+{
+  LodePNGInfo* info = &state->info_png;
+  if(insize == 0 || in == 0)
+  {
+    CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/
+  }
+  if(insize < 33)
+  {
+    CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/
+  }
+
+  /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/
+  lodepng_info_cleanup(info);
+  lodepng_info_init(info);
+
+  if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71
+     || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10)
+  {
+    CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/
+  }
+  if(lodepng_chunk_length(in + 8) != 13)
+  {
+    CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/
+  }
+  if(!lodepng_chunk_type_equals(in + 8, "IHDR"))
+  {
+    CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/
+  }
+
+  /*read the values given in the header*/
+  *w = lodepng_read32bitInt(&in[16]);
+  *h = lodepng_read32bitInt(&in[20]);
+  info->color.bitdepth = in[24];
+  info->color.colortype = (LodePNGColorType)in[25];
+  info->compression_method = in[26];
+  info->filter_method = in[27];
+  info->interlace_method = in[28];
+
+  if(*w == 0 || *h == 0)
+  {
+    CERROR_RETURN_ERROR(state->error, 93);
+  }
+
+  if(!state->decoder.ignore_crc)
+  {
+    unsigned CRC = lodepng_read32bitInt(&in[29]);
+    unsigned checksum = lodepng_crc32(&in[12], 17);
+    if(CRC != checksum)
+    {
+      CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/
+    }
+  }
+
+  /*error: only compression method 0 is allowed in the specification*/
+  if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32);
+  /*error: only filter method 0 is allowed in the specification*/
+  if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33);
+  /*error: only interlace methods 0 and 1 exist in the specification*/
+  if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34);
+
+  state->error = checkColorValidity(info->color.colortype, info->color.bitdepth);
+  return state->error;
+}
+
+static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon,
+                                 size_t bytewidth, unsigned char filterType, size_t length)
+{
+  /*
+  For PNG filter method 0
+  unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte,
+  the filter works byte per byte (bytewidth = 1)
+  precon is the previous unfiltered scanline, recon the result, scanline the current one
+  the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead
+  recon and scanline MAY be the same memory address! precon must be disjoint.
+  */
+
+  size_t i;
+  switch(filterType)
+  {
+    case 0:
+      for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      break;
+    case 1:
+      for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth];
+      break;
+    case 2:
+      if(precon)
+      {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i];
+      }
+      else
+      {
+        for(i = 0; i != length; ++i) recon[i] = scanline[i];
+      }
+      break;
+    case 3:
+      if(precon)
+      {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1);
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1);
+      }
+      else
+      {
+        for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1);
+      }
+      break;
+    case 4:
+      if(precon)
+      {
+        for(i = 0; i != bytewidth; ++i)
+        {
+          recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/
+        }
+        for(i = bytewidth; i < length; ++i)
+        {
+          recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth]));
+        }
+      }
+      else
+      {
+        for(i = 0; i != bytewidth; ++i)
+        {
+          recon[i] = scanline[i];
+        }
+        for(i = bytewidth; i < length; ++i)
+        {
+          /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/
+          recon[i] = (scanline[i] + recon[i - bytewidth]);
+        }
+      }
+      break;
+    default: return 36; /*error: unexisting filter type given*/
+  }
+  return 0;
+}
+
+static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
+{
+  /*
+  For PNG filter method 0
+  this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times)
+  out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline
+  w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel
+  in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes)
+  */
+
+  unsigned y;
+  unsigned char* prevline = 0;
+
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7) / 8;
+  size_t linebytes = (w * bpp + 7) / 8;
+
+  for(y = 0; y < h; ++y)
+  {
+    size_t outindex = linebytes * y;
+    size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+    unsigned char filterType = in[inindex];
+
+    CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes));
+
+    prevline = &out[outindex];
+  }
+
+  return 0;
+}
+
+/*
+in: Adam7 interlaced image, with no padding bits between scanlines, but between
+ reduced images so that each reduced image starts at a byte.
+out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h
+bpp: bits per pixel
+out has the following size in bits: w * h * bpp.
+in is possibly bigger due to padding bits between reduced images.
+out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation
+(because that's likely a little bit faster)
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
+{
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8)
+  {
+    for(i = 0; i != 7; ++i)
+    {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x)
+      {
+        size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        for(b = 0; b < bytewidth; ++b)
+        {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  }
+  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
+  {
+    for(i = 0; i != 7; ++i)
+    {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x)
+      {
+        ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        for(b = 0; b < bpp; ++b)
+        {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/
+          setBitOfReversedStream0(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+static void removePaddingBits(unsigned char* out, const unsigned char* in,
+                              size_t olinebits, size_t ilinebits, unsigned h)
+{
+  /*
+  After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need
+  to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers
+  for the Adam7 code, the color convert code and the output to the user.
+  in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must
+  have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits
+  also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7
+  only useful if (ilinebits - olinebits) is a value in the range 1..7
+  */
+  unsigned y;
+  size_t diff = ilinebits - olinebits;
+  size_t ibp = 0, obp = 0; /*input and output bit pointers*/
+  for(y = 0; y < h; ++y)
+  {
+    size_t x;
+    for(x = 0; x < olinebits; ++x)
+    {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    ibp += diff;
+  }
+}
+
+/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from
+the IDAT chunks (with filter index bytes and possible padding bits)
+return value is error*/
+static unsigned postProcessScanlines(unsigned char* out, unsigned char* in,
+                                     unsigned w, unsigned h, const LodePNGInfo* info_png)
+{
+  /*
+  This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype.
+  Steps:
+  *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8)
+  *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace
+  NOTE: the in buffer will be overwritten with intermediate data!
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  if(bpp == 0) return 31; /*error: invalid colortype*/
+
+  if(info_png->interlace_method == 0)
+  {
+    if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
+    {
+      CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp));
+      removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h);
+    }
+    /*we can immediately filter into the out buffer, no other steps needed*/
+    else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp));
+  }
+  else /*interlace_method is 1 (Adam7)*/
+  {
+    unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned i;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    for(i = 0; i != 7; ++i)
+    {
+      CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp));
+      /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline,
+      move bytes instead of bits or move not at all*/
+      if(bpp < 8)
+      {
+        /*remove padding bits in scanlines; after this there still may be padding
+        bits between the different reduced images: each reduced image still starts nicely at a byte*/
+        removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp,
+                          ((passw[i] * bpp + 7) / 8) * 8, passh[i]);
+      }
+    }
+
+    Adam7_deinterlace(out, in, w, h, bpp);
+  }
+
+  return 0;
+}
+
+static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
+{
+  unsigned pos = 0, i;
+  if(color->palette) lodepng_free(color->palette);
+  color->palettesize = chunkLength / 3;
+  color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize);
+  if(!color->palette && color->palettesize)
+  {
+    color->palettesize = 0;
+    return 83; /*alloc fail*/
+  }
+  if(color->palettesize > 256) return 38; /*error: palette too big*/
+
+  for(i = 0; i != color->palettesize; ++i)
+  {
+    color->palette[4 * i + 0] = data[pos++]; /*R*/
+    color->palette[4 * i + 1] = data[pos++]; /*G*/
+    color->palette[4 * i + 2] = data[pos++]; /*B*/
+    color->palette[4 * i + 3] = 255; /*alpha*/
+  }
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength)
+{
+  unsigned i;
+  if(color->colortype == LCT_PALETTE)
+  {
+    /*error: more alpha values given than there are palette entries*/
+    if(chunkLength > color->palettesize) return 38;
+
+    for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i];
+  }
+  else if(color->colortype == LCT_GREY)
+  {
+    /*error: this chunk must be 2 bytes for greyscale image*/
+    if(chunkLength != 2) return 30;
+
+    color->key_defined = 1;
+    color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1];
+  }
+  else if(color->colortype == LCT_RGB)
+  {
+    /*error: this chunk must be 6 bytes for RGB image*/
+    if(chunkLength != 6) return 41;
+
+    color->key_defined = 1;
+    color->key_r = 256u * data[0] + data[1];
+    color->key_g = 256u * data[2] + data[3];
+    color->key_b = 256u * data[4] + data[5];
+  }
+  else return 42; /*error: tRNS chunk not allowed for other color models*/
+
+  return 0; /* OK */
+}
+
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*background color chunk (bKGD)*/
+static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
+{
+  if(info->color.colortype == LCT_PALETTE)
+  {
+    /*error: this chunk must be 1 byte for indexed color image*/
+    if(chunkLength != 1) return 43;
+
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = data[0];
+  }
+  else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
+  {
+    /*error: this chunk must be 2 bytes for greyscale image*/
+    if(chunkLength != 2) return 44;
+
+    info->background_defined = 1;
+    info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1];
+  }
+  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
+  {
+    /*error: this chunk must be 6 bytes for greyscale image*/
+    if(chunkLength != 6) return 45;
+
+    info->background_defined = 1;
+    info->background_r = 256u * data[0] + data[1];
+    info->background_g = 256u * data[2] + data[3];
+    info->background_b = 256u * data[4] + data[5];
+  }
+
+  return 0; /* OK */
+}
+
+/*text chunk (tEXt)*/
+static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
+{
+  unsigned error = 0;
+  char *key = 0, *str = 0;
+  unsigned i;
+
+  while(!error) /*not really a while loop, only used to break on error*/
+  {
+    unsigned length, string2_begin;
+
+    length = 0;
+    while(length < chunkLength && data[length] != 0) ++length;
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    string2_begin = length + 1; /*skip keyword null terminator*/
+
+    length = chunkLength < string2_begin ? 0 : (unsigned int)(chunkLength - string2_begin);
+    str = (char*)lodepng_malloc(length + 1);
+    if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    str[length] = 0;
+    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
+
+    error = lodepng_add_text(info, key, str);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(str);
+
+  return error;
+}
+
+/*compressed text chunk (zTXt)*/
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength)
+{
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, string2_begin;
+  char *key = 0;
+  ucvector decoded;
+
+  ucvector_init(&decoded);
+
+  while(!error) /*not really a while loop, only used to break on error*/
+  {
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    string2_begin = length + 2;
+    if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
+
+    length = (unsigned int)(chunkLength - string2_begin);
+    /*will fail if zlib error, e.g. if length is too small*/
+    error = zlib_decompress(&decoded.data, &decoded.size,
+                            (unsigned char*)(&data[string2_begin]),
+                            length, zlibsettings);
+    if(error) break;
+    ucvector_push_back(&decoded, 0);
+
+    error = lodepng_add_text(info, key, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+/*international text chunk (iTXt)*/
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+                               const unsigned char* data, size_t chunkLength)
+{
+  unsigned error = 0;
+  unsigned i;
+
+  unsigned length, begin, compressed;
+  char *key = 0, *langtag = 0, *transkey = 0;
+  ucvector decoded;
+  ucvector_init(&decoded);
+
+  while(!error) /*not really a while loop, only used to break on error*/
+  {
+    /*Quick check if the chunk length isn't too small. Even without check
+    it'd still fail with other error checks below if it's too short. This just gives a different error code.*/
+    if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/
+
+    /*read the key*/
+    for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
+    if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/
+    if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/
+
+    key = (char*)lodepng_malloc(length + 1);
+    if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    key[length] = 0;
+    for(i = 0; i != length; ++i) key[i] = (char)data[i];
+
+    /*read the compression method*/
+    compressed = data[length + 1];
+    if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
+
+    /*even though it's not allowed by the standard, no error is thrown if
+    there's no null termination char, if the text is empty for the next 3 texts*/
+
+    /*read the langtag*/
+    begin = length + 3;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    langtag = (char*)lodepng_malloc(length + 1);
+    if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    langtag[length] = 0;
+    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
+
+    /*read the transkey*/
+    begin += length + 1;
+    length = 0;
+    for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length;
+
+    transkey = (char*)lodepng_malloc(length + 1);
+    if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
+
+    transkey[length] = 0;
+    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
+
+    /*read the actual text*/
+    begin += length + 1;
+
+    length = chunkLength < begin ? 0 : (unsigned int)(chunkLength - begin);
+
+    if(compressed)
+    {
+      /*will fail if zlib error, e.g. if length is too small*/
+      error = zlib_decompress(&decoded.data, &decoded.size,
+                              (unsigned char*)(&data[begin]),
+                              length, zlibsettings);
+      if(error) break;
+      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
+      ucvector_push_back(&decoded, 0);
+    }
+    else
+    {
+      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
+
+      decoded.data[length] = 0;
+      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
+    }
+
+    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
+
+    break;
+  }
+
+  lodepng_free(key);
+  lodepng_free(langtag);
+  lodepng_free(transkey);
+  ucvector_cleanup(&decoded);
+
+  return error;
+}
+
+static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
+{
+  if(chunkLength != 7) return 73; /*invalid tIME chunk size*/
+
+  info->time_defined = 1;
+  info->time.year = 256u * data[0] + data[1];
+  info->time.month = data[2];
+  info->time.day = data[3];
+  info->time.hour = data[4];
+  info->time.minute = data[5];
+  info->time.second = data[6];
+
+  return 0; /* OK */
+}
+
+static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength)
+{
+  if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/
+
+  info->phys_defined = 1;
+  info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3];
+  info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7];
+  info->phys_unit = data[8];
+
+  return 0; /* OK */
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/
+static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h,
+                          LodePNGState* state,
+                          const unsigned char* in, size_t insize)
+{
+  unsigned char IEND = 0;
+  const unsigned char* chunk;
+  size_t i;
+  ucvector idat; /*the data from idat chunks*/
+  ucvector scanlines;
+  size_t predict;
+  size_t numpixels;
+
+  /*for unknown chunk order*/
+  unsigned unknown = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+
+  state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/
+  if(state->error) return;
+
+  numpixels = *w * *h;
+
+  /*multiplication overflow*/
+  if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92);
+  /*multiplication overflow possible further below. Allows up to 2^31-1 pixel
+  bytes with 16-bit RGBA, the rest is room for filter bytes.*/
+  if(numpixels > 268435455) CERROR_RETURN(state->error, 92);
+
+  ucvector_init(&idat);
+  chunk = &in[33]; /*first byte of the first chunk after the header*/
+
+  /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
+  IDAT data is put at the start of the in buffer*/
+  while(!IEND && !state->error)
+  {
+    unsigned chunkLength;
+    const unsigned char* data; /*the data in the chunk*/
+
+    /*error: size of the in buffer too small to contain next chunk*/
+    if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30);
+
+    /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/
+    chunkLength = lodepng_chunk_length(chunk);
+    /*error: chunk length larger than the max PNG chunk size*/
+    if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63);
+
+    if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in)
+    {
+      CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/
+    }
+
+    data = lodepng_chunk_data_const(chunk);
+
+    /*IDAT chunk, containing compressed image data*/
+    if(lodepng_chunk_type_equals(chunk, "IDAT"))
+    {
+      size_t oldsize = idat.size;
+      if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
+      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 3;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+    /*IEND chunk*/
+    else if(lodepng_chunk_type_equals(chunk, "IEND"))
+    {
+      IEND = 1;
+    }
+    /*palette chunk (PLTE)*/
+    else if(lodepng_chunk_type_equals(chunk, "PLTE"))
+    {
+      state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      critical_pos = 2;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+    /*palette transparency chunk (tRNS)*/
+    else if(lodepng_chunk_type_equals(chunk, "tRNS"))
+    {
+      state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength);
+      if(state->error) break;
+    }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*background color chunk (bKGD)*/
+    else if(lodepng_chunk_type_equals(chunk, "bKGD"))
+    {
+      state->error = readChunk_bKGD(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    }
+    /*text chunk (tEXt)*/
+    else if(lodepng_chunk_type_equals(chunk, "tEXt"))
+    {
+      if(state->decoder.read_text_chunks)
+      {
+        state->error = readChunk_tEXt(&state->info_png, data, chunkLength);
+        if(state->error) break;
+      }
+    }
+    /*compressed text chunk (zTXt)*/
+    else if(lodepng_chunk_type_equals(chunk, "zTXt"))
+    {
+      if(state->decoder.read_text_chunks)
+      {
+        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    }
+    /*international text chunk (iTXt)*/
+    else if(lodepng_chunk_type_equals(chunk, "iTXt"))
+    {
+      if(state->decoder.read_text_chunks)
+      {
+        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        if(state->error) break;
+      }
+    }
+    else if(lodepng_chunk_type_equals(chunk, "tIME"))
+    {
+      state->error = readChunk_tIME(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    }
+    else if(lodepng_chunk_type_equals(chunk, "pHYs"))
+    {
+      state->error = readChunk_pHYs(&state->info_png, data, chunkLength);
+      if(state->error) break;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    else /*it's not an implemented chunk type, so ignore it: skip over the data*/
+    {
+      /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/
+      if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69);
+
+      unknown = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+      if(state->decoder.remember_unknown_chunks)
+      {
+        state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1],
+                                            &state->info_png.unknown_chunks_size[critical_pos - 1], chunk);
+        if(state->error) break;
+      }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    }
+
+    if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/
+    {
+      if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
+    }
+
+    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
+  }
+
+  ucvector_init(&scanlines);
+  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+  If the decompressed size does not match the prediction, the image must be corrupt.*/
+  if(state->info_png.interlace_method == 0)
+  {
+    /*The extra *h is added because this are the filter bytes every scanline starts with*/
+    predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h;
+  }
+  else
+  {
+    /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/
+    const LodePNGColorMode* color = &state->info_png.color;
+    predict = 0;
+    predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
+    if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3);
+    predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3);
+    if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2);
+    predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2);
+    if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1);
+    predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1);
+  }
+  if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/
+  if(!state->error)
+  {
+    state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data,
+                                   idat.size, &state->decoder.zlibsettings);
+    if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/
+  }
+  ucvector_cleanup(&idat);
+
+  if(!state->error)
+  {
+    size_t outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!*out) state->error = 83; /*alloc fail*/
+    for(i = 0; i < outsize; i++) (*out)[i] = 0;
+    if(!state->error) state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png);
+  }
+  ucvector_cleanup(&scanlines);
+}
+
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize)
+{
+  *out = 0;
+  decodeGeneric(out, w, h, state, in, insize);
+  if(state->error) return state->error;
+  if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color))
+  {
+    /*same color type, no copying or converting of data needed*/
+    /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype
+    the raw image has to the end user*/
+    if(!state->decoder.color_convert)
+    {
+      state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color);
+      if(state->error) return state->error;
+    }
+  }
+  else
+  {
+    /*color conversion needed; sort of copy of the data*/
+    unsigned char* data = *out;
+    size_t outsize;
+
+    /*TODO: check if this works according to the statement in the documentation: "The converter can convert
+    from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/
+    if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA)
+       && !(state->info_raw.bitdepth == 8))
+    {
+      return 56; /*unsupported color mode conversion*/
+    }
+
+    outsize = lodepng_get_raw_size(*w, *h, &state->info_raw);
+    *out = (unsigned char*)lodepng_malloc(outsize);
+    if(!(*out))
+    {
+      state->error = 83; /*alloc fail*/
+    }
+    else state->error = lodepng_convert(*out, data, &state->info_raw,
+                                        &state->info_png.color, *w, *h);
+    lodepng_free(data);
+  }
+  return state->error;
+}
+
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in,
+                               size_t insize, LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  error = lodepng_decode(out, w, h, &state, in, insize);
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
+{
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize)
+{
+  return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error;
+  error = lodepng_load_file(&buffer, &buffersize, filename);
+  if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
+{
+  return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8);
+}
+
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename)
+{
+  return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings)
+{
+  settings->color_convert = 1;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->read_text_chunks = 1;
+  settings->remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+  settings->ignore_crc = 0;
+  lodepng_decompress_settings_init(&settings->zlibsettings);
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+
+void lodepng_state_init(LodePNGState* state)
+{
+#ifdef LODEPNG_COMPILE_DECODER
+  lodepng_decoder_settings_init(&state->decoder);
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  lodepng_encoder_settings_init(&state->encoder);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  lodepng_color_mode_init(&state->info_raw);
+  lodepng_info_init(&state->info_png);
+  state->error = 1;
+}
+
+void lodepng_state_cleanup(LodePNGState* state)
+{
+  lodepng_color_mode_cleanup(&state->info_raw);
+  lodepng_info_cleanup(&state->info_png);
+}
+
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source)
+{
+  lodepng_state_cleanup(dest);
+  *dest = *source;
+  lodepng_color_mode_init(&dest->info_raw);
+  lodepng_info_init(&dest->info_png);
+  dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return;
+  dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return;
+}
+
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG Encoder                                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*chunkName must be string of 4 characters*/
+static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length)
+{
+  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
+  out->allocsize = out->size; /*fix the allocsize again*/
+  return 0;
+}
+
+static void writeSignature(ucvector* out)
+{
+  /*8 bytes PNG signature, aka the magic bytes*/
+  ucvector_push_back(out, 137);
+  ucvector_push_back(out, 80);
+  ucvector_push_back(out, 78);
+  ucvector_push_back(out, 71);
+  ucvector_push_back(out, 13);
+  ucvector_push_back(out, 10);
+  ucvector_push_back(out, 26);
+  ucvector_push_back(out, 10);
+}
+
+static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
+                              LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method)
+{
+  unsigned error = 0;
+  ucvector header;
+  ucvector_init(&header);
+
+  lodepng_add32bitInt(&header, w); /*width*/
+  lodepng_add32bitInt(&header, h); /*height*/
+  ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/
+  ucvector_push_back(&header, (unsigned char)colortype); /*color type*/
+  ucvector_push_back(&header, 0); /*compression method*/
+  ucvector_push_back(&header, 0); /*filter method*/
+  ucvector_push_back(&header, interlace_method); /*interlace method*/
+
+  error = addChunk(out, "IHDR", header.data, header.size);
+  ucvector_cleanup(&header);
+
+  return error;
+}
+
+static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info)
+{
+  unsigned error = 0;
+  size_t i;
+  ucvector PLTE;
+  ucvector_init(&PLTE);
+  for(i = 0; i != info->palettesize * 4; ++i)
+  {
+    /*add all channels except alpha channel*/
+    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
+  }
+  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
+  ucvector_cleanup(&PLTE);
+
+  return error;
+}
+
+static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info)
+{
+  unsigned error = 0;
+  size_t i;
+  ucvector tRNS;
+  ucvector_init(&tRNS);
+  if(info->colortype == LCT_PALETTE)
+  {
+    size_t amount = info->palettesize;
+    /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
+    for(i = info->palettesize; i != 0; --i)
+    {
+      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
+      else break;
+    }
+    /*add only alpha channel*/
+    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
+  }
+  else if(info->colortype == LCT_GREY)
+  {
+    if(info->key_defined)
+    {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+    }
+  }
+  else if(info->colortype == LCT_RGB)
+  {
+    if(info->key_defined)
+    {
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
+      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
+    }
+  }
+
+  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
+  ucvector_cleanup(&tRNS);
+
+  return error;
+}
+
+static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
+                              LodePNGCompressSettings* zlibsettings)
+{
+  ucvector zlibdata;
+  unsigned error = 0;
+
+  /*compress with the Zlib compressor*/
+  ucvector_init(&zlibdata);
+  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
+  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
+  ucvector_cleanup(&zlibdata);
+
+  return error;
+}
+
+static unsigned addChunk_IEND(ucvector* out)
+{
+  unsigned error = 0;
+  error = addChunk(out, "IEND", 0, 0);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring)
+{
+  unsigned error = 0;
+  size_t i;
+  ucvector text;
+  ucvector_init(&text);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&text, 0); /*0 termination char*/
+  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
+  error = addChunk(out, "tEXt", text.data, text.size);
+  ucvector_cleanup(&text);
+
+  return error;
+}
+
+static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
+                              LodePNGCompressSettings* zlibsettings)
+{
+  unsigned error = 0;
+  ucvector data, compressed;
+  size_t i, textsize = strlen(textstring);
+
+  ucvector_init(&data);
+  ucvector_init(&compressed);
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*0 termination char*/
+  ucvector_push_back(&data, 0); /*compression method: 0*/
+
+  error = zlib_compress(&compressed.data, &compressed.size,
+                        (unsigned char*)textstring, textsize, zlibsettings);
+  if(!error)
+  {
+    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
+    error = addChunk(out, "zTXt", data.data, data.size);
+  }
+
+  ucvector_cleanup(&compressed);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
+                              const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings)
+{
+  unsigned error = 0;
+  ucvector data;
+  size_t i, textsize = strlen(textstring);
+
+  ucvector_init(&data);
+
+  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
+  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
+  ucvector_push_back(&data, 0); /*null termination char*/
+  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
+  ucvector_push_back(&data, 0); /*compression method*/
+  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
+  ucvector_push_back(&data, 0); /*null termination char*/
+
+  if(compressed)
+  {
+    ucvector compressed_data;
+    ucvector_init(&compressed_data);
+    error = zlib_compress(&compressed_data.data, &compressed_data.size,
+                          (unsigned char*)textstring, textsize, zlibsettings);
+    if(!error)
+    {
+      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
+    }
+    ucvector_cleanup(&compressed_data);
+  }
+  else /*not compressed*/
+  {
+    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
+  }
+
+  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
+  ucvector_cleanup(&data);
+  return error;
+}
+
+static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info)
+{
+  unsigned error = 0;
+  ucvector bKGD;
+  ucvector_init(&bKGD);
+  if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA)
+  {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
+  }
+  else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA)
+  {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8));
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255));
+  }
+  else if(info->color.colortype == LCT_PALETTE)
+  {
+    ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/
+  }
+
+  error = addChunk(out, "bKGD", bKGD.data, bKGD.size);
+  ucvector_cleanup(&bKGD);
+
+  return error;
+}
+
+static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time)
+{
+  unsigned error = 0;
+  unsigned char* data = (unsigned char*)lodepng_malloc(7);
+  if(!data) return 83; /*alloc fail*/
+  data[0] = (unsigned char)(time->year >> 8);
+  data[1] = (unsigned char)(time->year & 255);
+  data[2] = (unsigned char)time->month;
+  data[3] = (unsigned char)time->day;
+  data[4] = (unsigned char)time->hour;
+  data[5] = (unsigned char)time->minute;
+  data[6] = (unsigned char)time->second;
+  error = addChunk(out, "tIME", data, 7);
+  lodepng_free(data);
+  return error;
+}
+
+static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info)
+{
+  unsigned error = 0;
+  ucvector data;
+  ucvector_init(&data);
+
+  lodepng_add32bitInt(&data, info->phys_x);
+  lodepng_add32bitInt(&data, info->phys_y);
+  ucvector_push_back(&data, info->phys_unit);
+
+  error = addChunk(out, "pHYs", data.data, data.size);
+  ucvector_cleanup(&data);
+
+  return error;
+}
+
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline,
+                           size_t length, size_t bytewidth, unsigned char filterType)
+{
+  size_t i;
+  switch(filterType)
+  {
+    case 0: /*None*/
+      for(i = 0; i != length; ++i) out[i] = scanline[i];
+      break;
+    case 1: /*Sub*/
+      for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+      for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth];
+      break;
+    case 2: /*Up*/
+      if(prevline)
+      {
+        for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i];
+      }
+      else
+      {
+        for(i = 0; i != length; ++i) out[i] = scanline[i];
+      }
+      break;
+    case 3: /*Average*/
+      if(prevline)
+      {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1);
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1);
+      }
+      else
+      {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1);
+      }
+      break;
+    case 4: /*Paeth*/
+      if(prevline)
+      {
+        /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/
+        for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]);
+        for(i = bytewidth; i < length; ++i)
+        {
+          out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth]));
+        }
+      }
+      else
+      {
+        for(i = 0; i != bytewidth; ++i) out[i] = scanline[i];
+        /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/
+        for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
+      }
+      break;
+    default: return; /*unexisting filter type given*/
+  }
+}
+
+/* log2 approximation. A slight bit faster than std::log. */
+static float flog2(float f)
+{
+  float result = 0;
+  while(f > 32) { result += 4; f /= 16; }
+  while(f > 2) { ++result; f /= 2; }
+  return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f);
+}
+
+static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
+                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings)
+{
+  /*
+  For PNG filter method 0
+  out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are
+  the scanlines with 1 extra byte per scanline
+  */
+
+  unsigned bpp = lodepng_get_bpp(info);
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = (w * bpp + 7) / 8;
+  /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
+  size_t bytewidth = (bpp + 7) / 8;
+  const unsigned char* prevline = 0;
+  unsigned x, y;
+  unsigned error = 0;
+  LodePNGFilterStrategy strategy = settings->filter_strategy;
+
+  /*
+  There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard:
+   *  If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e.
+      use fixed filtering, with the filter None).
+   * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is
+     not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply
+     all five filters and select the filter that produces the smallest sum of absolute values per row.
+  This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true.
+
+  If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed,
+  but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum
+  heuristic is used.
+  */
+  if(settings->filter_palette_zero &&
+     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
+
+  if(bpp == 0) return 31; /*error: invalid color type*/
+
+  if(strategy == LFS_ZERO)
+  {
+    for(y = 0; y != h; ++y)
+    {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      out[outindex] = 0; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0);
+      prevline = &in[inindex];
+    }
+  }
+  else if(strategy == LFS_MINSUM)
+  {
+    /*adaptive filtering*/
+    size_t sum[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned char type, bestType = 0;
+
+    for(type = 0; type != 5; ++type)
+    {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+
+    if(!error)
+    {
+      for(y = 0; y != h; ++y)
+      {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type)
+        {
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+
+          /*calculate the sum of the result*/
+          sum[type] = 0;
+          if(type == 0)
+          {
+            for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]);
+          }
+          else
+          {
+            for(x = 0; x != linebytes; ++x)
+            {
+              /*For differences, each byte should be treated as signed, values above 127 are negative
+              (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there.
+              This means filtertype 0 is almost never chosen, but that is justified.*/
+              unsigned char s = attempt[type][x];
+              sum[type] += s < 128 ? s : (255U - s);
+            }
+          }
+
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum[type] < smallest)
+          {
+            bestType = type;
+            smallest = sum[type];
+          }
+        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+      }
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  }
+  else if(strategy == LFS_ENTROPY)
+  {
+    float sum[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    float smallest = 0;
+    unsigned type, bestType = 0;
+    unsigned count[256];
+
+    for(type = 0; type != 5; ++type)
+    {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+
+    for(y = 0; y != h; ++y)
+    {
+      /*try the 5 filter types*/
+      for(type = 0; type != 5; ++type)
+      {
+        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+        for(x = 0; x != 256; ++x) count[x] = 0;
+        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+        ++count[type]; /*the filter type itself is part of the scanline*/
+        sum[type] = 0;
+        for(x = 0; x != 256; ++x)
+        {
+          float p = count[x] / (float)(linebytes + 1);
+          sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p;
+        }
+        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+        if(type == 0 || sum[type] < smallest)
+        {
+          bestType = type;
+          smallest = sum[type];
+        }
+      }
+
+      prevline = &in[y * linebytes];
+
+      /*now fill the out values*/
+      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+    }
+
+    for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
+  }
+  else if(strategy == LFS_PREDEFINED)
+  {
+    for(y = 0; y != h; ++y)
+    {
+      size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/
+      size_t inindex = linebytes * y;
+      unsigned char type = settings->predefined_filters[y];
+      out[outindex] = type; /*filter type byte*/
+      filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type);
+      prevline = &in[inindex];
+    }
+  }
+  else if(strategy == LFS_BRUTE_FORCE)
+  {
+    /*brute force filter chooser.
+    deflate the scanline after every filter attempt to see which one deflates best.
+    This is very slow and gives only slightly smaller, sometimes even larger, result*/
+    size_t size[5];
+    unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/
+    size_t smallest = 0;
+    unsigned type = 0, bestType = 0;
+    unsigned char* dummy;
+    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
+    /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
+    to simulate the true case where the tree is the same for the whole image. Sometimes it gives
+    better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
+    cases better compression. It does make this a bit less slow, so it's worth doing this.*/
+    zlibsettings.btype = 1;
+    /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG
+    images only, so disable it*/
+    zlibsettings.custom_zlib = 0;
+    zlibsettings.custom_deflate = 0;
+    for(type = 0; type != 5; ++type)
+    {
+      attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
+      if(!attempt[type]) return 83; /*alloc fail*/
+    }
+    for(y = 0; y != h; ++y) /*try the 5 filter types*/
+    {
+      for(type = 0; type != 5; ++type)
+      {
+        size_t testsize = linebytes;
+        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+
+        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+        size[type] = 0;
+        dummy = 0;
+        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+        lodepng_free(dummy);
+        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+        if(type == 0 || size[type] < smallest)
+        {
+          bestType = type;
+          smallest = size[type];
+        }
+      }
+      prevline = &in[y * linebytes];
+      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
+    }
+    for(type = 0; type != 5; ++type) free(attempt[type]);
+  }
+  else return 88; /* unknown filter strategy */
+
+  return error;
+}
+
+static void addPaddingBits(unsigned char* out, const unsigned char* in,
+                           size_t olinebits, size_t ilinebits, unsigned h)
+{
+  /*The opposite of the removePaddingBits function
+  olinebits must be >= ilinebits*/
+  unsigned y;
+  size_t diff = olinebits - ilinebits;
+  size_t obp = 0, ibp = 0; /*bit pointers*/
+  for(y = 0; y != h; ++y)
+  {
+    size_t x;
+    for(x = 0; x < ilinebits; ++x)
+    {
+      unsigned char bit = readBitFromReversedStream(&ibp, in);
+      setBitOfReversedStream(&obp, out, bit);
+    }
+    /*obp += diff; --> no, fill in some value in the padding bits too, to avoid
+    "Use of uninitialised value of size ###" warning from valgrind*/
+    for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0);
+  }
+}
+
+/*
+in: non-interlaced image with size w*h
+out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with
+ no padding bits between scanlines, but between reduced images so that each
+ reduced image starts at a byte.
+bpp: bits per pixel
+there are no padding bits, not between scanlines, not between reduced images
+in has the following size in bits: w * h * bpp.
+out is possibly bigger due to padding bits between reduced images
+NOTE: comments about padding bits are only relevant if bpp < 8
+*/
+static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp)
+{
+  unsigned passw[7], passh[7];
+  size_t filter_passstart[8], padded_passstart[8], passstart[8];
+  unsigned i;
+
+  Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+  if(bpp >= 8)
+  {
+    for(i = 0; i != 7; ++i)
+    {
+      unsigned x, y, b;
+      size_t bytewidth = bpp / 8;
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x)
+      {
+        size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth;
+        for(b = 0; b < bytewidth; ++b)
+        {
+          out[pixeloutstart + b] = in[pixelinstart + b];
+        }
+      }
+    }
+  }
+  else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/
+  {
+    for(i = 0; i != 7; ++i)
+    {
+      unsigned x, y, b;
+      unsigned ilinebits = bpp * passw[i];
+      unsigned olinebits = bpp * w;
+      size_t obp, ibp; /*bit pointers (for out and in buffer)*/
+      for(y = 0; y < passh[i]; ++y)
+      for(x = 0; x < passw[i]; ++x)
+      {
+        ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        obp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
+        for(b = 0; b < bpp; ++b)
+        {
+          unsigned char bit = readBitFromReversedStream(&ibp, in);
+          setBitOfReversedStream(&obp, out, bit);
+        }
+      }
+    }
+  }
+}
+
+/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image.
+return value is error**/
+static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                    unsigned w, unsigned h,
+                                    const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings)
+{
+  /*
+  This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps:
+  *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter
+  *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter
+  */
+  unsigned bpp = lodepng_get_bpp(&info_png->color);
+  unsigned error = 0;
+
+  if(info_png->interlace_method == 0)
+  {
+    *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out) && (*outsize)) error = 83; /*alloc fail*/
+
+    if(!error)
+    {
+      /*non multiple of 8 bits per scanline, padding bits needed per scanline*/
+      if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8)
+      {
+        unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8));
+        if(!padded) error = 83; /*alloc fail*/
+        if(!error)
+        {
+          addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h);
+          error = filter(*out, padded, w, h, &info_png->color, settings);
+        }
+        lodepng_free(padded);
+      }
+      else
+      {
+        /*we can immediately filter into the out buffer, no other steps needed*/
+        error = filter(*out, in, w, h, &info_png->color, settings);
+      }
+    }
+  }
+  else /*interlace_method is 1 (Adam7)*/
+  {
+    unsigned passw[7], passh[7];
+    size_t filter_passstart[8], padded_passstart[8], passstart[8];
+    unsigned char* adam7;
+
+    Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp);
+
+    *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!(*out)) error = 83; /*alloc fail*/
+
+    adam7 = (unsigned char*)lodepng_malloc(passstart[7]);
+    if(!adam7 && passstart[7]) error = 83; /*alloc fail*/
+
+    if(!error)
+    {
+      unsigned i;
+
+      Adam7_interlace(adam7, in, w, h, bpp);
+      for(i = 0; i != 7; ++i)
+      {
+        if(bpp < 8)
+        {
+          unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]);
+          if(!padded) ERROR_BREAK(83); /*alloc fail*/
+          addPaddingBits(padded, &adam7[passstart[i]],
+                         ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]);
+          error = filter(&(*out)[filter_passstart[i]], padded,
+                         passw[i], passh[i], &info_png->color, settings);
+          lodepng_free(padded);
+        }
+        else
+        {
+          error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]],
+                         passw[i], passh[i], &info_png->color, settings);
+        }
+
+        if(error) break;
+      }
+    }
+
+    lodepng_free(adam7);
+  }
+
+  return error;
+}
+
+/*
+palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
+returns 0 if the palette is opaque,
+returns 1 if the palette has a single color with alpha 0 ==> color key
+returns 2 if the palette is semi-translucent.
+*/
+static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize)
+{
+  size_t i;
+  unsigned key = 0;
+  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
+  for(i = 0; i != palettesize; ++i)
+  {
+    if(!key && palette[4 * i + 3] == 0)
+    {
+      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
+      key = 1;
+      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
+    }
+    else if(palette[4 * i + 3] != 255) return 2;
+    /*when key, no opaque RGB may have key's RGB*/
+    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
+  }
+  return key;
+}
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize)
+{
+  unsigned char* inchunk = data;
+  while((size_t)(inchunk - data) < datasize)
+  {
+    CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
+    out->allocsize = out->size; /*fix the allocsize again*/
+    inchunk = lodepng_chunk_next(inchunk);
+  }
+  return 0;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state)
+{
+  LodePNGInfo info;
+  ucvector outv;
+  unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
+  size_t datasize = 0;
+
+  /*provide some proper output values if error will happen*/
+  *out = 0;
+  *outsize = 0;
+  state->error = 0;
+
+  lodepng_info_init(&info);
+  lodepng_info_copy(&info, &state->info_png);
+
+  if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette)
+      && (info.color.palettesize == 0 || info.color.palettesize > 256))
+  {
+    state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/
+    return state->error;
+  }
+
+  if(state->encoder.auto_convert)
+  {
+    state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw);
+  }
+  if(state->error) return state->error;
+
+  if(state->encoder.zlibsettings.btype > 2)
+  {
+    CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/
+  }
+  if(state->info_png.interlace_method > 1)
+  {
+    CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/
+  }
+
+  state->error = checkColorValidity(info.color.colortype, info.color.bitdepth);
+  if(state->error) return state->error; /*error: unexisting color type given*/
+  state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
+  if(state->error) return state->error; /*error: unexisting color type given*/
+
+  if(!lodepng_color_mode_equal(&state->info_raw, &info.color))
+  {
+    unsigned char* converted;
+    size_t size = (w * h * lodepng_get_bpp(&info.color) + 7) / 8;
+
+    converted = (unsigned char*)lodepng_malloc(size);
+    if(!converted && size) state->error = 83; /*alloc fail*/
+    if(!state->error)
+    {
+      state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
+    }
+    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    lodepng_free(converted);
+  }
+  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+
+  ucvector_init(&outv);
+  while(!state->error) /*while only executed once, to break on error*/
+  {
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    size_t i;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*write signature and chunks*/
+    writeSignature(&outv);
+    /*IHDR*/
+    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*unknown chunks between IHDR and PLTE*/
+    if(info.unknown_chunks_data[0])
+    {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]);
+      if(state->error) break;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*PLTE*/
+    if(info.color.colortype == LCT_PALETTE)
+    {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA))
+    {
+      addChunk_PLTE(&outv, &info.color);
+    }
+    /*tRNS*/
+    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0)
+    {
+      addChunk_tRNS(&outv, &info.color);
+    }
+    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined)
+    {
+      addChunk_tRNS(&outv, &info.color);
+    }
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*bKGD (must come between PLTE and the IDAt chunks*/
+    if(info.background_defined) addChunk_bKGD(&outv, &info);
+    /*pHYs (must come before the IDAT chunks)*/
+    if(info.phys_defined) addChunk_pHYs(&outv, &info);
+
+    /*unknown chunks between PLTE and IDAT*/
+    if(info.unknown_chunks_data[1])
+    {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]);
+      if(state->error) break;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    /*IDAT (multiple IDAT chunks must be consecutive)*/
+    state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings);
+    if(state->error) break;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+    /*tIME*/
+    if(info.time_defined) addChunk_tIME(&outv, &info.time);
+    /*tEXt and/or zTXt*/
+    for(i = 0; i != info.text_num; ++i)
+    {
+      if(strlen(info.text_keys[i]) > 79)
+      {
+        state->error = 66; /*text chunk too large*/
+        break;
+      }
+      if(strlen(info.text_keys[i]) < 1)
+      {
+        state->error = 67; /*text chunk too small*/
+        break;
+      }
+      if(state->encoder.text_compression)
+      {
+        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+      }
+      else
+      {
+        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+      }
+    }
+    /*LodePNG version id in text chunk*/
+    if(state->encoder.add_id)
+    {
+      unsigned alread_added_id_text = 0;
+      for(i = 0; i != info.text_num; ++i)
+      {
+        if(!strcmp(info.text_keys[i], "LodePNG"))
+        {
+          alread_added_id_text = 1;
+          break;
+        }
+      }
+      if(alread_added_id_text == 0)
+      {
+        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+      }
+    }
+    /*iTXt*/
+    for(i = 0; i != info.itext_num; ++i)
+    {
+      if(strlen(info.itext_keys[i]) > 79)
+      {
+        state->error = 66; /*text chunk too large*/
+        break;
+      }
+      if(strlen(info.itext_keys[i]) < 1)
+      {
+        state->error = 67; /*text chunk too small*/
+        break;
+      }
+      addChunk_iTXt(&outv, state->encoder.text_compression,
+                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+                    &state->encoder.zlibsettings);
+    }
+
+    /*unknown chunks between IDAT and IEND*/
+    if(info.unknown_chunks_data[2])
+    {
+      state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]);
+      if(state->error) break;
+    }
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+    addChunk_IEND(&outv);
+
+    break; /*this isn't really a while loop; no error happened so break out now!*/
+  }
+
+  lodepng_info_cleanup(&info);
+  lodepng_free(data);
+  /*instead of cleaning the vector up, give it to the output*/
+  *out = outv.data;
+  *outsize = outv.size;
+
+  return state->error;
+}
+
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image,
+                               unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned error;
+  LodePNGState state;
+  lodepng_state_init(&state);
+  state.info_raw.colortype = colortype;
+  state.info_raw.bitdepth = bitdepth;
+  state.info_png.color.colortype = colortype;
+  state.info_png.color.bitdepth = bitdepth;
+  lodepng_encode(out, outsize, image, w, h, &state);
+  error = state.error;
+  lodepng_state_cleanup(&state);
+  return error;
+}
+
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
+{
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h)
+{
+  return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth);
+  if(!error) error = lodepng_save_file(buffer, buffersize, filename);
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
+{
+  return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8);
+}
+
+unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h)
+{
+  return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8);
+}
+#endif /*LODEPNG_COMPILE_DISK*/
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings)
+{
+  lodepng_compress_settings_init(&settings->zlibsettings);
+  settings->filter_palette_zero = 1;
+  settings->filter_strategy = LFS_MINSUM;
+  settings->auto_convert = 1;
+  settings->force_palette = 0;
+  settings->predefined_filters = 0;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  settings->add_id = 0;
+  settings->text_compression = 1;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*
+This returns the description of a numerical error code in English. This is also
+the documentation of all the error codes.
+*/
+const char* lodepng_error_text(unsigned code)
+{
+  switch(code)
+  {
+    case 0: return "no error, everything went ok";
+    case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/
+    case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/
+    case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/
+    case 13: return "problem while processing dynamic deflate block";
+    case 14: return "problem while processing dynamic deflate block";
+    case 15: return "problem while processing dynamic deflate block";
+    case 16: return "unexisting code while processing dynamic deflate block";
+    case 17: return "end of out buffer memory reached while inflating";
+    case 18: return "invalid distance code while inflating";
+    case 19: return "end of out buffer memory reached while inflating";
+    case 20: return "invalid deflate block BTYPE encountered while decoding";
+    case 21: return "NLEN is not ones complement of LEN in a deflate block";
+     /*end of out buffer memory reached while inflating:
+     This can happen if the inflated deflate data is longer than the amount of bytes required to fill up
+     all the pixels of the image, given the color depth and image dimensions. Something that doesn't
+     happen in a normal, well encoded, PNG image.*/
+    case 22: return "end of out buffer memory reached while inflating";
+    case 23: return "end of in buffer memory reached while inflating";
+    case 24: return "invalid FCHECK in zlib header";
+    case 25: return "invalid compression method in zlib header";
+    case 26: return "FDICT encountered in zlib header while it's not used for PNG";
+    case 27: return "PNG file is smaller than a PNG header";
+    /*Checks the magic file header, the first 8 bytes of the PNG file*/
+    case 28: return "incorrect PNG signature, it's no PNG or corrupted";
+    case 29: return "first chunk is not the header chunk";
+    case 30: return "chunk length too large, chunk broken off at end of file";
+    case 31: return "illegal PNG color type or bpp";
+    case 32: return "illegal PNG compression method";
+    case 33: return "illegal PNG filter method";
+    case 34: return "illegal PNG interlace method";
+    case 35: return "chunk length of a chunk is too large or the chunk too small";
+    case 36: return "illegal PNG filter type encountered";
+    case 37: return "illegal bit depth for this color type given";
+    case 38: return "the palette is too big"; /*more than 256 colors*/
+    case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette";
+    case 40: return "tRNS chunk has wrong size for greyscale image";
+    case 41: return "tRNS chunk has wrong size for RGB image";
+    case 42: return "tRNS chunk appeared while it was not allowed for this color type";
+    case 43: return "bKGD chunk has wrong size for palette image";
+    case 44: return "bKGD chunk has wrong size for greyscale image";
+    case 45: return "bKGD chunk has wrong size for RGB image";
+    case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?";
+    case 49: return "jumped past memory while generating dynamic huffman tree";
+    case 50: return "jumped past memory while generating dynamic huffman tree";
+    case 51: return "jumped past memory while inflating huffman block";
+    case 52: return "jumped past memory while inflating";
+    case 53: return "size of zlib data too small";
+    case 54: return "repeat symbol in tree while there was no value symbol yet";
+    /*jumped past tree while generating huffman tree, this could be when the
+    tree will have more leaves than symbols after generating it out of the
+    given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/
+    case 55: return "jumped past tree while generating huffman tree";
+    case 56: return "given output image colortype or bitdepth not supported for color conversion";
+    case 57: return "invalid CRC encountered (checking CRC can be disabled)";
+    case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)";
+    case 59: return "requested color conversion not supported";
+    case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)";
+    case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)";
+    /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/
+    case 62: return "conversion from color to greyscale not supported";
+    case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/
+    /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/
+    case 64: return "the length of the END symbol 256 in the Huffman tree is 0";
+    case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes";
+    case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
+    case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
+    case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
+    case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)";
+    case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)";
+    case 73: return "invalid tIME chunk size";
+    case 74: return "invalid pHYs chunk size";
+    /*length could be wrong, or data chopped off*/
+    case 75: return "no null termination char found while decoding text chunk";
+    case 76: return "iTXt chunk too short to contain required bytes";
+    case 77: return "integer overflow in buffer size";
+    case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/
+    case 79: return "failed to open file for writing";
+    case 80: return "tried creating a tree of 0 symbols";
+    case 81: return "lazy matching at pos 0 is impossible";
+    case 82: return "color conversion to palette requested while a color isn't in palette";
+    case 83: return "memory allocation failed";
+    case 84: return "given image too small to contain all pixels to be encoded";
+    case 86: return "impossible offset in lz77 encoding (internal bug)";
+    case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined";
+    case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy";
+    case 89: return "text chunk keyword too short or long: must have size 1-79";
+    /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/
+    case 90: return "windowsize must be a power of two";
+    case 91: return "invalid decompressed idat size";
+    case 92: return "too many pixels, not supported";
+    case 93: return "zero width or height is invalid";
+    case 94: return "header chunk must have a size of 13 bytes";
+  }
+  return "unknown error code";
+}
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // C++ Wrapper                                                          // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng
+{
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename)
+{
+  std::ifstream file(filename.c_str(), std::ios::in|std::ios::binary|std::ios::ate);
+  if(!file) return 78;
+
+  /*get filesize*/
+  std::streamsize size = 0;
+  if(file.seekg(0, std::ios::end).good()) size = file.tellg();
+  if(file.seekg(0, std::ios::beg).good()) size -= file.tellg();
+
+  /*read contents of the file into the vector*/
+  buffer.resize(size_t(size));
+  if(size > 0) file.read((char*)(&buffer[0]), size);
+
+  return 0; /* OK */
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename)
+{
+  std::ofstream file(filename.c_str(), std::ios::out|std::ios::binary);
+  if(!file) return 79;
+  file.write(buffer.empty() ? 0 : (char*)&buffer[0], std::streamsize(buffer.size()));
+  return 0;
+}
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings)
+{
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer)
+  {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings)
+{
+  return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings)
+{
+  unsigned char* buffer = 0;
+  size_t buffersize = 0;
+  unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings);
+  if(buffer)
+  {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings)
+{
+  return compress(out, in.empty() ? 0 : &in[0], in.size(), settings);
+}
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+
+
+#ifdef LODEPNG_COMPILE_PNG
+
+State::State()
+{
+  lodepng_state_init(this);
+}
+
+State::State(const State& other)
+{
+  lodepng_state_init(this);
+  lodepng_state_copy(this, &other);
+}
+
+State::~State()
+{
+  lodepng_state_cleanup(this);
+}
+
+State& State::operator=(const State& other)
+{
+  lodepng_state_copy(this, &other);
+  return *this;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
+                size_t insize, LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned char* buffer;
+  unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
+  if(buffer && !error)
+  {
+    State state;
+    state.info_raw.colortype = colortype;
+    state.info_raw.bitdepth = bitdepth;
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in, LodePNGColorType colortype, unsigned bitdepth)
+{
+  return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth);
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize)
+{
+  unsigned char* buffer = NULL;
+  unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize);
+  if(buffer && !error)
+  {
+    size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+  }
+  lodepng_free(buffer);
+  return error;
+}
+
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in)
+{
+  return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size());
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const std::string& filename,
+                LodePNGColorType colortype, unsigned bitdepth)
+{
+  std::vector<unsigned char> buffer;
+  unsigned error = load_file(buffer, filename);
+  if(error) return error;
+  return decode(out, w, h, buffer, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DECODER */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+unsigned encode(std::vector<unsigned char>& out, const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth)
+{
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth);
+  if(buffer)
+  {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth)
+{
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state)
+{
+  unsigned char* buffer;
+  size_t buffersize;
+  unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state);
+  if(buffer)
+  {
+    out.insert(out.end(), &buffer[0], &buffer[buffersize]);
+    lodepng_free(buffer);
+  }
+  return error;
+}
+
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state)
+{
+  if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84;
+  return encode(out, in.empty() ? 0 : &in[0], w, h, state);
+}
+
+#ifdef LODEPNG_COMPILE_DISK
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth)
+{
+  std::vector<unsigned char> buffer;
+  unsigned error = encode(buffer, in, w, h, colortype, bitdepth);
+  if(!error) error = save_file(buffer, filename);
+  return error;
+}
+
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype, unsigned bitdepth)
+{
+  if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84;
+  return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth);
+}
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_PNG */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
diff --git a/third_party/lodepng/lodepng.h b/third_party/lodepng/lodepng.h
new file mode 100644
index 0000000..f121ab3
--- /dev/null
+++ b/third_party/lodepng/lodepng.h
@@ -0,0 +1,1756 @@
+/*
+LodePNG version 20160124
+
+Copyright (c) 2005-2016 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+#ifndef LODEPNG_H
+#define LODEPNG_H
+
+#include <string.h> /*for size_t*/
+
+extern const char* LODEPNG_VERSION_STRING;
+
+/*
+The following #defines are used to create code sections. They can be disabled
+to disable code sections, which can give faster compile time and smaller binary.
+The "NO_COMPILE" defines are designed to be used to pass as defines to the
+compiler command to disable them without modifying this header, e.g.
+-DLODEPNG_NO_COMPILE_ZLIB for gcc.
+In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to
+allow implementing a custom lodepng_crc32.
+*/
+/*deflate & zlib. If disabled, you must specify alternative zlib functions in
+the custom_zlib field of the compress and decompress settings*/
+#ifndef LODEPNG_NO_COMPILE_ZLIB
+#define LODEPNG_COMPILE_ZLIB
+#endif
+/*png encoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_PNG
+#define LODEPNG_COMPILE_PNG
+#endif
+/*deflate&zlib decoder and png decoder*/
+#ifndef LODEPNG_NO_COMPILE_DECODER
+#define LODEPNG_COMPILE_DECODER
+#endif
+/*deflate&zlib encoder and png encoder*/
+#ifndef LODEPNG_NO_COMPILE_ENCODER
+#define LODEPNG_COMPILE_ENCODER
+#endif
+/*the optional built in harddisk file loading and saving functions*/
+#ifndef LODEPNG_NO_COMPILE_DISK
+#define LODEPNG_COMPILE_DISK
+#endif
+/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/
+#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS
+#define LODEPNG_COMPILE_ANCILLARY_CHUNKS
+#endif
+/*ability to convert error numerical codes to English text string*/
+#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT
+#define LODEPNG_COMPILE_ERROR_TEXT
+#endif
+/*Compile the default allocators (C's free, malloc and realloc). If you disable this,
+you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your
+source files with custom allocators.*/
+#ifndef LODEPNG_NO_COMPILE_ALLOCATORS
+#define LODEPNG_COMPILE_ALLOCATORS
+#endif
+/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/
+#ifdef __cplusplus
+#ifndef LODEPNG_NO_COMPILE_CPP
+#define LODEPNG_COMPILE_CPP
+#endif
+#endif
+
+#ifdef LODEPNG_COMPILE_CPP
+#include <vector>
+#include <string>
+#endif /*LODEPNG_COMPILE_CPP*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*The PNG color types (also used for raw).*/
+typedef enum LodePNGColorType
+{
+  LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/
+  LCT_RGB = 2, /*RGB: 8,16 bit*/
+  LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/
+  LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/
+  LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/
+} LodePNGColorType;
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Converts PNG data in memory to raw pixel data.
+out: Output parameter. Pointer to buffer that will contain the raw pixel data.
+     After decoding, its size is w * h * (bytes per pixel) bytes larger than
+     initially. Bytes per pixel depends on colortype and bitdepth.
+     Must be freed after usage with free(*out).
+     Note: for 16-bit per channel colors, uses big endian format like PNG does.
+w: Output parameter. Pointer to width of pixel data.
+h: Output parameter. Pointer to height of pixel data.
+in: Memory buffer with the PNG file.
+insize: size of the in buffer.
+colortype: the desired color type for the raw output image. See explanation on PNG color types.
+bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h,
+                               const unsigned char* in, size_t insize,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/
+unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/
+unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h,
+                          const unsigned char* in, size_t insize);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load PNG from disk, from file with given name.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h,
+                             const char* filename,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/
+unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+
+/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/
+unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h,
+                               const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Converts raw pixel data into a PNG image in memory. The colortype and bitdepth
+  of the output PNG image cannot be chosen, they are automatically determined
+  by the colortype, bitdepth and content of the input pixel data.
+  Note: for 16-bit per channel colors, needs big endian format like PNG does.
+out: Output parameter. Pointer to buffer that will contain the PNG image data.
+     Must be freed after usage with free(*out).
+outsize: Output parameter. Pointer to the size in bytes of the out buffer.
+image: The raw pixel data to encode. The size of this buffer should be
+       w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth.
+w: width of the raw pixel data in pixels.
+h: height of the raw pixel data in pixels.
+colortype: the color type of the raw input image. See explanation on PNG color types.
+bitdepth: the bit depth of the raw input image. See explanation on PNG color types.
+Return value: LodePNG error code (0 means no error).
+*/
+unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize,
+                               const unsigned char* image, unsigned w, unsigned h,
+                               LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24(unsigned char** out, size_t* outsize,
+                          const unsigned char* image, unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned lodepng_encode_file(const char* filename,
+                             const unsigned char* image, unsigned w, unsigned h,
+                             LodePNGColorType colortype, unsigned bitdepth);
+
+/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/
+unsigned lodepng_encode32_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+
+/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/
+unsigned lodepng_encode24_file(const char* filename,
+                               const unsigned char* image, unsigned w, unsigned h);
+#endif /*LODEPNG_COMPILE_DISK*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#ifdef LODEPNG_COMPILE_CPP
+namespace lodepng
+{
+#ifdef LODEPNG_COMPILE_DECODER
+/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype
+is the format to output the pixels to. Default is RGBA 8-bit per channel.*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const unsigned char* in, size_t insize,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::vector<unsigned char>& in,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts PNG file from disk to raw pixel data in memory.
+Same as the other decode functions, but instead takes a filename as input.
+*/
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                const std::string& filename,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype
+is that of the raw input data. The output PNG color type will be auto chosen.*/
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Converts 32-bit RGBA raw pixel data into a PNG file on disk.
+Same as the other encode functions, but instead takes a filename as output.
+NOTE: This overwrites existing files without warning!
+*/
+unsigned encode(const std::string& filename,
+                const unsigned char* in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+unsigned encode(const std::string& filename,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_ENCODER */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ERROR_TEXT
+/*Returns an English description of the numerical error code.*/
+const char* lodepng_error_text(unsigned code);
+#endif /*LODEPNG_COMPILE_ERROR_TEXT*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Settings for zlib decompression*/
+typedef struct LodePNGDecompressSettings LodePNGDecompressSettings;
+struct LodePNGDecompressSettings
+{
+  unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
+
+  /*use custom zlib decoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGDecompressSettings*);
+  /*use custom deflate decoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_inflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGDecompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGDecompressSettings lodepng_default_decompress_settings;
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Settings for zlib compression. Tweaking these settings tweaks the balance
+between speed and compression ratio.
+*/
+typedef struct LodePNGCompressSettings LodePNGCompressSettings;
+struct LodePNGCompressSettings /*deflate = compress*/
+{
+  /*LZ77 related settings*/
+  unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/
+  unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/
+  unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/
+  unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/
+  unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/
+  unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/
+
+  /*use custom zlib encoder instead of built in one (default: null)*/
+  unsigned (*custom_zlib)(unsigned char**, size_t*,
+                          const unsigned char*, size_t,
+                          const LodePNGCompressSettings*);
+  /*use custom deflate encoder instead of built in one (default: null)
+  if custom_zlib is used, custom_deflate is ignored since only the built in
+  zlib function will call custom_deflate*/
+  unsigned (*custom_deflate)(unsigned char**, size_t*,
+                             const unsigned char*, size_t,
+                             const LodePNGCompressSettings*);
+
+  const void* custom_context; /*optional custom settings for custom functions*/
+};
+
+extern const LodePNGCompressSettings lodepng_default_compress_settings;
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_PNG
+/*
+Color mode of an image. Contains all information required to decode the pixel
+bits to RGBA colors. This information is the same as used in the PNG file
+format, and is used both for PNG and raw image data in LodePNG.
+*/
+typedef struct LodePNGColorMode
+{
+  /*header (IHDR)*/
+  LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/
+  unsigned bitdepth;  /*bits per sample, see PNG standard or documentation further in this header file*/
+
+  /*
+  palette (PLTE and tRNS)
+
+  Dynamically allocated with the colors of the palette, including alpha.
+  When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use
+  lodepng_palette_clear, then for each color use lodepng_palette_add.
+  If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette.
+
+  When decoding, by default you can ignore this palette, since LodePNG already
+  fills the palette colors in the pixels of the raw RGBA output.
+
+  The palette is only supported for color type 3.
+  */
+  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
+  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
+
+  /*
+  transparent color key (tRNS)
+
+  This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit.
+  For greyscale PNGs, r, g and b will all 3 be set to the same.
+
+  When decoding, by default you can ignore this information, since LodePNG sets
+  pixels with this key to transparent already in the raw RGBA output.
+
+  The color key is only supported for color types 0 and 2.
+  */
+  unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/
+  unsigned key_r;       /*red/greyscale component of color key*/
+  unsigned key_g;       /*green component of color key*/
+  unsigned key_b;       /*blue component of color key*/
+} LodePNGColorMode;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_color_mode_init(LodePNGColorMode* info);
+void lodepng_color_mode_cleanup(LodePNGColorMode* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source);
+
+void lodepng_palette_clear(LodePNGColorMode* info);
+/*add 1 color to the palette*/
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a);
+
+/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info);
+/*get the amount of color channels used, based on colortype in the struct.
+If a palette is used, it counts as 1 channel.*/
+unsigned lodepng_get_channels(const LodePNGColorMode* info);
+/*is it a greyscale type? (only colortype 0 or 4)*/
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info);
+/*has it got an alpha channel? (only colortype 2 or 6)*/
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info);
+/*has it got a palette? (only colortype 3)*/
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info);
+/*only returns true if there is a palette and there is a value in the palette with alpha < 255.
+Loops through the palette to check this.*/
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info);
+/*
+Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image.
+Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels).
+Returns false if the image can only have opaque pixels.
+In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values,
+or if "key_defined" is true.
+*/
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info);
+/*Returns the byte size of a raw image buffer with given width, height and color mode*/
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+/*The information of a Time chunk in PNG.*/
+typedef struct LodePNGTime
+{
+  unsigned year;    /*2 bytes used (0-65535)*/
+  unsigned month;   /*1-12*/
+  unsigned day;     /*1-31*/
+  unsigned hour;    /*0-23*/
+  unsigned minute;  /*0-59*/
+  unsigned second;  /*0-60 (to allow for leap seconds)*/
+} LodePNGTime;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*Information about the PNG image, except pixels, width and height.*/
+typedef struct LodePNGInfo
+{
+  /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/
+  unsigned compression_method;/*compression method of the original file. Always 0.*/
+  unsigned filter_method;     /*filter method of the original file*/
+  unsigned interlace_method;  /*interlace method of the original file*/
+  LodePNGColorMode color;     /*color type and bits, palette and transparency of the PNG file*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*
+  suggested background color chunk (bKGD)
+  This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit.
+
+  For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding
+  the encoder writes the red one. For palette PNGs: When decoding, the RGB value
+  will be stored, not a palette index. But when encoding, specify the index of
+  the palette in background_r, the other two are then ignored.
+
+  The decoder does not use this background color to edit the color of pixels.
+  */
+  unsigned background_defined; /*is a suggested background color given?*/
+  unsigned background_r;       /*red component of suggested background color*/
+  unsigned background_g;       /*green component of suggested background color*/
+  unsigned background_b;       /*blue component of suggested background color*/
+
+  /*
+  non-international text chunks (tEXt and zTXt)
+
+  The char** arrays each contain num strings. The actual messages are in
+  text_strings, while text_keys are keywords that give a short description what
+  the actual text represents, e.g. Title, Author, Description, or anything else.
+
+  A keyword is minimum 1 character and maximum 79 characters long. It's
+  discouraged to use a single line length longer than 79 characters for texts.
+
+  Don't allocate these text buffers yourself. Use the init/cleanup functions
+  correctly and use lodepng_add_text and lodepng_clear_text.
+  */
+  size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
+  char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
+  char** text_strings; /*the actual text*/
+
+  /*
+  international text chunks (iTXt)
+  Similar to the non-international text chunks, but with additional strings
+  "langtags" and "transkeys".
+  */
+  size_t itext_num; /*the amount of international texts in this PNG*/
+  char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
+  char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/
+  char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/
+  char** itext_strings; /*the actual international text - UTF-8 string*/
+
+  /*time chunk (tIME)*/
+  unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/
+  LodePNGTime time;
+
+  /*phys chunk (pHYs)*/
+  unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/
+  unsigned phys_x; /*pixels per unit in x direction*/
+  unsigned phys_y; /*pixels per unit in y direction*/
+  unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/
+
+  /*
+  unknown chunks
+  There are 3 buffers, one for each position in the PNG where unknown chunks can appear
+  each buffer contains all unknown chunks for that position consecutively
+  The 3 buffers are the unknown chunks between certain critical chunks:
+  0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND
+  Do not allocate or traverse this data yourself. Use the chunk traversing functions declared
+  later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct.
+  */
+  unsigned char* unknown_chunks_data[3];
+  size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGInfo;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_info_init(LodePNGInfo* info);
+void lodepng_info_cleanup(LodePNGInfo* info);
+/*return value is error code (0 means no error)*/
+unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source);
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/
+unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/
+
+void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+
+/*
+Converts raw buffer from one color type to another color type, based on
+LodePNGColorMode structs to describe the input and output color type.
+See the reference manual at the end of this header file to see which color conversions are supported.
+return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported)
+The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel
+of the output color type (lodepng_get_bpp).
+For < 8 bpp images, there should not be padding bits at the end of scanlines.
+For 16-bit per channel colors, uses big endian format like PNG does.
+Return value is LodePNG error code
+*/
+unsigned lodepng_convert(unsigned char* out, const unsigned char* in,
+                         const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in,
+                         unsigned w, unsigned h);
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Settings for the decoder. This contains settings for the PNG and the Zlib
+decoder, but not the Info settings from the Info structs.
+*/
+typedef struct LodePNGDecoderSettings
+{
+  LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/
+
+  unsigned ignore_crc; /*ignore CRC checksums*/
+
+  unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+  /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
+  unsigned remember_unknown_chunks;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGDecoderSettings;
+
+void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/
+typedef enum LodePNGFilterStrategy
+{
+  /*every filter at zero*/
+  LFS_ZERO,
+  /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/
+  LFS_MINSUM,
+  /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending
+  on the image, this is better or worse than minsum.*/
+  LFS_ENTROPY,
+  /*
+  Brute-force-search PNG filters by compressing each filter for each scanline.
+  Experimental, very slow, and only rarely gives better compression than MINSUM.
+  */
+  LFS_BRUTE_FORCE,
+  /*use predefined_filters buffer: you specify the filter type for each scanline*/
+  LFS_PREDEFINED
+} LodePNGFilterStrategy;
+
+/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding.
+Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/
+typedef struct LodePNGColorProfile
+{
+  unsigned colored; /*not greyscale*/
+  unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/
+  unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/
+  unsigned short key_g;
+  unsigned short key_b;
+  unsigned alpha; /*alpha channel or alpha palette required*/
+  unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/
+  unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/
+  unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/
+} LodePNGColorProfile;
+
+void lodepng_color_profile_init(LodePNGColorProfile* profile);
+
+/*Get a LodePNGColorProfile of the image.*/
+unsigned lodepng_get_color_profile(LodePNGColorProfile* profile,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in);
+/*The function LodePNG uses internally to decide the PNG color with auto_convert.
+Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/
+unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
+                                   const unsigned char* image, unsigned w, unsigned h,
+                                   const LodePNGColorMode* mode_in);
+
+/*Settings for the encoder.*/
+typedef struct LodePNGEncoderSettings
+{
+  LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/
+
+  unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/
+
+  /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than
+  8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to
+  completely follow the official PNG heuristic, filter_palette_zero must be true and
+  filter_strategy must be LFS_MINSUM*/
+  unsigned filter_palette_zero;
+  /*Which filter strategy to use when not using zeroes due to filter_palette_zero.
+  Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/
+  LodePNGFilterStrategy filter_strategy;
+  /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with
+  the same length as the amount of scanlines in the image, and each value must <= 5. You
+  have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero
+  must be set to 0 to ensure this is also used on palette or low bitdepth images.*/
+  const unsigned char* predefined_filters;
+
+  /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette).
+  If colortype is 3, PLTE is _always_ created.*/
+  unsigned force_palette;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*add LodePNG identifier and version as a text chunk, for debugging*/
+  unsigned add_id;
+  /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/
+  unsigned text_compression;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+} LodePNGEncoderSettings;
+
+void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER)
+/*The settings, state and information for extended encoding and decoding.*/
+typedef struct LodePNGState
+{
+#ifdef LODEPNG_COMPILE_DECODER
+  LodePNGDecoderSettings decoder; /*the decoding settings*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+  LodePNGEncoderSettings encoder; /*the encoding settings*/
+#endif /*LODEPNG_COMPILE_ENCODER*/
+  LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
+  LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
+  unsigned error;
+#ifdef LODEPNG_COMPILE_CPP
+  /* For the lodepng::State subclass. */
+  virtual ~LodePNGState(){}
+#endif
+} LodePNGState;
+
+/*init, cleanup and copy functions to use with this struct*/
+void lodepng_state_init(LodePNGState* state);
+void lodepng_state_cleanup(LodePNGState* state);
+void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source);
+#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*
+Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and
+getting much more information about the PNG image and color mode.
+*/
+unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h,
+                        LodePNGState* state,
+                        const unsigned char* in, size_t insize);
+
+/*
+Read the PNG header, but not the actual data. This returns only the information
+that is in the header chunk of the PNG, such as width, height and color type. The
+information is placed in the info_png field of the LodePNGState.
+*/
+unsigned lodepng_inspect(unsigned* w, unsigned* h,
+                         LodePNGState* state,
+                         const unsigned char* in, size_t insize);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/
+unsigned lodepng_encode(unsigned char** out, size_t* outsize,
+                        const unsigned char* image, unsigned w, unsigned h,
+                        LodePNGState* state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*
+The lodepng_chunk functions are normally not needed, except to traverse the
+unknown chunks stored in the LodePNGInfo struct, or add new ones to it.
+It also allows traversing the chunks of an encoded PNG file yourself.
+
+PNG standard chunk naming conventions:
+First byte: uppercase = critical, lowercase = ancillary
+Second byte: uppercase = public, lowercase = private
+Third byte: must be uppercase
+Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy
+*/
+
+/*
+Gets the length of the data of the chunk. Total chunk length has 12 bytes more.
+There must be at least 4 bytes to read from. If the result value is too large,
+it may be corrupt data.
+*/
+unsigned lodepng_chunk_length(const unsigned char* chunk);
+
+/*puts the 4-byte type in null terminated string*/
+void lodepng_chunk_type(char type[5], const unsigned char* chunk);
+
+/*check if the type is the given type*/
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type);
+
+/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk);
+
+/*0: public, 1: private (see PNG standard)*/
+unsigned char lodepng_chunk_private(const unsigned char* chunk);
+
+/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk);
+
+/*get pointer to the data of the chunk, where the input points to the header of the chunk*/
+unsigned char* lodepng_chunk_data(unsigned char* chunk);
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk);
+
+/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk);
+
+/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/
+void lodepng_chunk_generate_crc(unsigned char* chunk);
+
+/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/
+unsigned char* lodepng_chunk_next(unsigned char* chunk);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
+
+/*
+Appends chunk to the data in out. The given chunk should already have its chunk header.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returns error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
+
+/*
+Appends new chunk to out. The chunk to append is given by giving its length, type
+and data separately. The type is a 4-letter string.
+The out variable and outlength are updated to reflect the new reallocated buffer.
+Returne error code (0 if it went ok)
+*/
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data);
+
+
+/*Calculate CRC32 of buffer*/
+unsigned lodepng_crc32(const unsigned char* buf, size_t len);
+#endif /*LODEPNG_COMPILE_PNG*/
+
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*
+This zlib part can be used independently to zlib compress and decompress a
+buffer. It cannot be used to create gzip files however, and it only supports the
+part of zlib that is required for PNG, it does not support dictionaries.
+*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings);
+
+/*
+Decompresses Zlib data. Reallocates the out buffer and appends the data. The
+data must be according to the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/*
+Compresses data with Zlib. Reallocates the out buffer and appends the data.
+Zlib adds a small header and trailer around the deflate data.
+The data is output in the format of the zlib specification.
+Either, *out must be NULL and *outsize must be 0, or, *out must be a valid
+buffer and *outsize its size in bytes. out must be freed by user after usage.
+*/
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize,
+                               const unsigned char* in, size_t insize,
+                               const LodePNGCompressSettings* settings);
+
+/*
+Find length-limited Huffman code for given frequencies. This function is in the
+public interface only for tests, it's used internally by lodepng_deflate.
+*/
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen);
+
+/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings);
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into buffer. The function allocates the out buffer, and
+after usage you should free it.
+out: output parameter, contains pointer to loaded buffer.
+outsize: output parameter, size of the allocated out buffer
+filename: the path to the file to load
+return value: error code (0 means ok)
+*/
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename);
+
+/*
+Save a file from buffer to disk. Warning, if it exists, this function overwrites
+the file without warning!
+buffer: the buffer to write
+buffersize: size of the buffer to write
+filename: the path to the file to save to
+return value: error code (0 means ok)
+*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename);
+#endif /*LODEPNG_COMPILE_DISK*/
+
+#ifdef LODEPNG_COMPILE_CPP
+/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */
+namespace lodepng
+{
+#ifdef LODEPNG_COMPILE_PNG
+class State : public LodePNGState
+{
+  public:
+    State();
+    State(const State& other);
+    virtual ~State();
+    State& operator=(const State& other);
+};
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Same as other lodepng::decode, but using a State for more settings and information. */
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const unsigned char* in, size_t insize);
+unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h,
+                State& state,
+                const std::vector<unsigned char>& in);
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Same as other lodepng::encode, but using a State for more settings and information. */
+unsigned encode(std::vector<unsigned char>& out,
+                const unsigned char* in, unsigned w, unsigned h,
+                State& state);
+unsigned encode(std::vector<unsigned char>& out,
+                const std::vector<unsigned char>& in, unsigned w, unsigned h,
+                State& state);
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DISK
+/*
+Load a file from disk into an std::vector.
+return value: error code (0 means ok)
+*/
+unsigned load_file(std::vector<unsigned char>& buffer, const std::string& filename);
+
+/*
+Save the binary data in an std::vector to a file on disk. The file is overwritten
+without warning.
+*/
+unsigned save_file(const std::vector<unsigned char>& buffer, const std::string& filename);
+#endif /* LODEPNG_COMPILE_DISK */
+#endif /* LODEPNG_COMPILE_PNG */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_DECODER
+/* Zlib-decompress an unsigned char buffer */
+unsigned decompress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+
+/* Zlib-decompress an std::vector */
+unsigned decompress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                    const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings);
+#endif /* LODEPNG_COMPILE_DECODER */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+/* Zlib-compress an unsigned char buffer */
+unsigned compress(std::vector<unsigned char>& out, const unsigned char* in, size_t insize,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+
+/* Zlib-compress an std::vector */
+unsigned compress(std::vector<unsigned char>& out, const std::vector<unsigned char>& in,
+                  const LodePNGCompressSettings& settings = lodepng_default_compress_settings);
+#endif /* LODEPNG_COMPILE_ENCODER */
+#endif /* LODEPNG_COMPILE_ZLIB */
+} /* namespace lodepng */
+#endif /*LODEPNG_COMPILE_CPP*/
+
+/*
+TODO:
+[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often
+[.] check compatibility with various compilers  - done but needs to be redone for every newer version
+[X] converting color to 16-bit per channel types
+[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values)
+[ ] make sure encoder generates no chunks with size > (2^31)-1
+[ ] partial decoding (stream processing)
+[X] let the "isFullyOpaque" function check color keys and transparent palettes too
+[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl"
+[ ] don't stop decoding on errors like 69, 57, 58 (make warnings)
+[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
+[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
+[ ] allow user to give data (void*) to custom allocator
+*/
+
+#endif /*LODEPNG_H inclusion guard*/
+
+/*
+LodePNG Documentation
+---------------------
+
+0. table of contents
+--------------------
+
+  1. about
+   1.1. supported features
+   1.2. features not supported
+  2. C and C++ version
+  3. security
+  4. decoding
+  5. encoding
+  6. color conversions
+    6.1. PNG color types
+    6.2. color conversions
+    6.3. padding bits
+    6.4. A note about 16-bits per channel and endianness
+  7. error values
+  8. chunks and PNG editing
+  9. compiler support
+  10. examples
+   10.1. decoder C++ example
+   10.2. decoder C example
+  11. state settings reference
+  12. changes
+  13. contact information
+
+
+1. about
+--------
+
+PNG is a file format to store raster images losslessly with good compression,
+supporting different color types and alpha channel.
+
+LodePNG is a PNG codec according to the Portable Network Graphics (PNG)
+Specification (Second Edition) - W3C Recommendation 10 November 2003.
+
+The specifications used are:
+
+*) Portable Network Graphics (PNG) Specification (Second Edition):
+     http://www.w3.org/TR/2003/REC-PNG-20031110
+*) RFC 1950 ZLIB Compressed Data Format version 3.3:
+     http://www.gzip.org/zlib/rfc-zlib.html
+*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3:
+     http://www.gzip.org/zlib/rfc-deflate.html
+
+The most recent version of LodePNG can currently be found at
+http://lodev.org/lodepng/
+
+LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds
+extra functionality.
+
+LodePNG exists out of two files:
+-lodepng.h: the header file for both C and C++
+-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage
+
+If you want to start using LodePNG right away without reading this doc, get the
+examples from the LodePNG website to see how to use it in code, or check the
+smaller examples in chapter 13 here.
+
+LodePNG is simple but only supports the basic requirements. To achieve
+simplicity, the following design choices were made: There are no dependencies
+on any external library. There are functions to decode and encode a PNG with
+a single function call, and extended versions of these functions taking a
+LodePNGState struct allowing to specify or get more information. By default
+the colors of the raw image are always RGB or RGBA, no matter what color type
+the PNG file uses. To read and write files, there are simple functions to
+convert the files to/from buffers in memory.
+
+This all makes LodePNG suitable for loading textures in games, demos and small
+programs, ... It's less suitable for full fledged image editors, loading PNGs
+over network (it requires all the image data to be available before decoding can
+begin), life-critical systems, ...
+
+1.1. supported features
+-----------------------
+
+The following features are supported by the decoder:
+
+*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image,
+   or the same color type as the PNG
+*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image
+*) Adam7 interlace and deinterlace for any color type
+*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk
+*) support for alpha channels, including RGBA color model, translucent palettes and color keying
+*) zlib decompression (inflate)
+*) zlib compression (deflate)
+*) CRC32 and ADLER32 checksums
+*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks.
+*) the following chunks are supported (generated/interpreted) by both encoder and decoder:
+    IHDR: header information
+    PLTE: color palette
+    IDAT: pixel data
+    IEND: the final chunk
+    tRNS: transparency for palettized images
+    tEXt: textual information
+    zTXt: compressed textual information
+    iTXt: international textual information
+    bKGD: suggested background color
+    pHYs: physical dimensions
+    tIME: modification time
+
+1.2. features not supported
+---------------------------
+
+The following features are _not_ supported:
+
+*) some features needed to make a conformant PNG-Editor might be still missing.
+*) partial loading/stream processing. All data must be available and is processed in one call.
+*) The following public chunks are not supported but treated as unknown chunks by LodePNG
+    cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT
+   Some of these are not supported on purpose: LodePNG wants to provide the RGB values
+   stored in the pixels, not values modified by system dependent gamma or color models.
+
+
+2. C and C++ version
+--------------------
+
+The C version uses buffers allocated with alloc that you need to free()
+yourself. You need to use init and cleanup functions for each struct whenever
+using a struct from the C version to avoid exploits and memory leaks.
+
+The C++ version has extra functions with std::vectors in the interface and the
+lodepng::State class which is a LodePNGState with constructor and destructor.
+
+These files work without modification for both C and C++ compilers because all
+the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers
+ignore it, and the C code is made to compile both with strict ISO C90 and C++.
+
+To use the C++ version, you need to rename the source file to lodepng.cpp
+(instead of lodepng.c), and compile it with a C++ compiler.
+
+To use the C version, you need to rename the source file to lodepng.c (instead
+of lodepng.cpp), and compile it with a C compiler.
+
+
+3. Security
+-----------
+
+Even if carefully designed, it's always possible that LodePNG contains possible
+exploits. If you discover one, please let me know, and it will be fixed.
+
+When using LodePNG, care has to be taken with the C version of LodePNG, as well
+as the C-style structs when working with C++. The following conventions are used
+for all C-style structs:
+
+-if a struct has a corresponding init function, always call the init function when making a new one
+-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks
+-if a struct has a corresponding copy function, use the copy function instead of "=".
+ The destination must also be inited already.
+
+
+4. Decoding
+-----------
+
+Decoding converts a PNG compressed image to a raw pixel buffer.
+
+Most documentation on using the decoder is at its declarations in the header
+above. For C, simple decoding can be done with functions such as
+lodepng_decode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_decode. For C++, all decoding can be done with the
+various lodepng::decode functions, and lodepng::State can be used for advanced
+features.
+
+When using the LodePNGState, it uses the following fields for decoding:
+*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here
+*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get
+*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use
+
+LodePNGInfo info_png
+--------------------
+
+After decoding, this contains extra information of the PNG image, except the actual
+pixels, width and height because these are already gotten directly from the decoder
+functions.
+
+It contains for example the original color type of the PNG image, text comments,
+suggested background color, etc... More details about the LodePNGInfo struct are
+at its declaration documentation.
+
+LodePNGColorMode info_raw
+-------------------------
+
+When decoding, here you can specify which color type you want
+the resulting raw image to be. If this is different from the colortype of the
+PNG, then the decoder will automatically convert the result. This conversion
+always works, except if you want it to convert a color PNG to greyscale or to
+a palette with missing colors.
+
+By default, 32-bit color is used for the result.
+
+LodePNGDecoderSettings decoder
+------------------------------
+
+The settings can be used to ignore the errors created by invalid CRC and Adler32
+chunks, and to disable the decoding of tEXt chunks.
+
+There's also a setting color_convert, true by default. If false, no conversion
+is done, the resulting data will be as it was in the PNG (after decompression)
+and you'll have to puzzle the colors of the pixels together yourself using the
+color type information in the LodePNGInfo.
+
+
+5. Encoding
+-----------
+
+Encoding converts a raw pixel buffer to a PNG compressed image.
+
+Most documentation on using the encoder is at its declarations in the header
+above. For C, simple encoding can be done with functions such as
+lodepng_encode32, and more advanced decoding can be done with the struct
+LodePNGState and lodepng_encode. For C++, all encoding can be done with the
+various lodepng::encode functions, and lodepng::State can be used for advanced
+features.
+
+Like the decoder, the encoder can also give errors. However it gives less errors
+since the encoder input is trusted, the decoder input (a PNG image that could
+be forged by anyone) is not trusted.
+
+When using the LodePNGState, it uses the following fields for encoding:
+*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be.
+*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has
+*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use
+
+LodePNGInfo info_png
+--------------------
+
+When encoding, you use this the opposite way as when decoding: for encoding,
+you fill in the values you want the PNG to have before encoding. By default it's
+not needed to specify a color type for the PNG since it's automatically chosen,
+but it's possible to choose it yourself given the right settings.
+
+The encoder will not always exactly match the LodePNGInfo struct you give,
+it tries as close as possible. Some things are ignored by the encoder. The
+encoder uses, for example, the following settings from it when applicable:
+colortype and bitdepth, text chunks, time chunk, the color key, the palette, the
+background color, the interlace method, unknown chunks, ...
+
+When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk.
+If the palette contains any colors for which the alpha channel is not 255 (so
+there are translucent colors in the palette), it'll add a tRNS chunk.
+
+LodePNGColorMode info_raw
+-------------------------
+
+You specify the color type of the raw image that you give to the input here,
+including a possible transparent color key and palette you happen to be using in
+your raw image data.
+
+By default, 32-bit color is assumed, meaning your input has to be in RGBA
+format with 4 bytes (unsigned chars) per pixel.
+
+LodePNGEncoderSettings encoder
+------------------------------
+
+The following settings are supported (some are in sub-structs):
+*) auto_convert: when this option is enabled, the encoder will
+automatically choose the smallest possible color mode (including color key) that
+can encode the colors of all pixels without information loss.
+*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree,
+   2 = dynamic huffman tree (best compression). Should be 2 for proper
+   compression.
+*) use_lz77: whether or not to use LZ77 for compressed block types. Should be
+   true for proper compression.
+*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value
+   2048 by default, but can be set to 32768 for better, but slow, compression.
+*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE
+   chunk if force_palette is true. This can used as suggested palette to convert
+   to by viewers that don't support more than 256 colors (if those still exist)
+*) add_id: add text chunk "Encoder: LodePNG <version>" to the image.
+*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks.
+  zTXt chunks use zlib compression on the text. This gives a smaller result on
+  large texts but a larger result on small texts (such as a single program name).
+  It's all tEXt or all zTXt though, there's no separate setting per text yet.
+
+
+6. color conversions
+--------------------
+
+An important thing to note about LodePNG, is that the color type of the PNG, and
+the color type of the raw image, are completely independent. By default, when
+you decode a PNG, you get the result as a raw image in the color type you want,
+no matter whether the PNG was encoded with a palette, greyscale or RGBA color.
+And if you encode an image, by default LodePNG will automatically choose the PNG
+color type that gives good compression based on the values of colors and amount
+of colors in the image. It can be configured to let you control it instead as
+well, though.
+
+To be able to do this, LodePNG does conversions from one color mode to another.
+It can convert from almost any color type to any other color type, except the
+following conversions: RGB to greyscale is not supported, and converting to a
+palette when the palette doesn't have a required color is not supported. This is
+not supported on purpose: this is information loss which requires a color
+reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey
+is easy, but there are multiple ways if you want to give some channels more
+weight).
+
+By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB
+color, no matter what color type the PNG has. And by default when encoding,
+LodePNG automatically picks the best color model for the output PNG, and expects
+the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control
+the color format of the images yourself, you can skip this chapter.
+
+6.1. PNG color types
+--------------------
+
+A PNG image can have many color types, ranging from 1-bit color to 64-bit color,
+as well as palettized color modes. After the zlib decompression and unfiltering
+in the PNG image is done, the raw pixel data will have that color type and thus
+a certain amount of bits per pixel. If you want the output raw image after
+decoding to have another color type, a conversion is done by LodePNG.
+
+The PNG specification gives the following color types:
+
+0: greyscale, bit depths 1, 2, 4, 8, 16
+2: RGB, bit depths 8 and 16
+3: palette, bit depths 1, 2, 4 and 8
+4: greyscale with alpha, bit depths 8 and 16
+6: RGBA, bit depths 8 and 16
+
+Bit depth is the amount of bits per pixel per color channel. So the total amount
+of bits per pixel is: amount of channels * bitdepth.
+
+6.2. color conversions
+----------------------
+
+As explained in the sections about the encoder and decoder, you can specify
+color types and bit depths in info_png and info_raw to change the default
+behaviour.
+
+If, when decoding, you want the raw image to be something else than the default,
+you need to set the color type and bit depth you want in the LodePNGColorMode,
+or the parameters colortype and bitdepth of the simple decoding function.
+
+If, when encoding, you use another color type than the default in the raw input
+image, you need to specify its color type and bit depth in the LodePNGColorMode
+of the raw image, or use the parameters colortype and bitdepth of the simple
+encoding function.
+
+If, when encoding, you don't want LodePNG to choose the output PNG color type
+but control it yourself, you need to set auto_convert in the encoder settings
+to false, and specify the color type you want in the LodePNGInfo of the
+encoder (including palette: it can generate a palette if auto_convert is true,
+otherwise not).
+
+If the input and output color type differ (whether user chosen or auto chosen),
+LodePNG will do a color conversion, which follows the rules below, and may
+sometimes result in an error.
+
+To avoid some confusion:
+-the decoder converts from PNG to raw image
+-the encoder converts from raw image to PNG
+-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image
+-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG
+-when encoding, the color type in LodePNGInfo is ignored if auto_convert
+ is enabled, it is automatically generated instead
+-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original
+ PNG image, but it can be ignored since the raw image has the color type you requested instead
+-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion
+ between the color types is done if the color types are supported. If it is not
+ supported, an error is returned. If the types are the same, no conversion is done.
+-even though some conversions aren't supported, LodePNG supports loading PNGs from any
+ colortype and saving PNGs to any colortype, sometimes it just requires preparing
+ the raw image correctly before encoding.
+-both encoder and decoder use the same color converter.
+
+Non supported color conversions:
+-color to greyscale: no error is thrown, but the result will look ugly because
+only the red channel is taken
+-anything to palette when that palette does not have that color in it: in this
+case an error is thrown
+
+Supported color conversions:
+-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA
+-any grey or grey+alpha, to grey or grey+alpha
+-anything to a palette, as long as the palette has the requested colors in it
+-removing alpha channel
+-higher to smaller bitdepth, and vice versa
+
+If you want no color conversion to be done (e.g. for speed or control):
+-In the encoder, you can make it save a PNG with any color type by giving the
+raw color mode and LodePNGInfo the same color mode, and setting auto_convert to
+false.
+-In the decoder, you can make it store the pixel data in the same color type
+as the PNG has, by setting the color_convert setting to false. Settings in
+info_raw are then ignored.
+
+The function lodepng_convert does the color conversion. It is available in the
+interface but normally isn't needed since the encoder and decoder already call
+it.
+
+6.3. padding bits
+-----------------
+
+In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines
+have a bit amount that isn't a multiple of 8, then padding bits are used so that each
+scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output.
+The raw input image you give to the encoder, and the raw output image you get from the decoder
+will NOT have these padding bits, e.g. in the case of a 1-bit image with a width
+of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte,
+not the first bit of a new byte.
+
+6.4. A note about 16-bits per channel and endianness
+----------------------------------------------------
+
+LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like
+for any other color format. The 16-bit values are stored in big endian (most
+significant byte first) in these arrays. This is the opposite order of the
+little endian used by x86 CPU's.
+
+LodePNG always uses big endian because the PNG file format does so internally.
+Conversions to other formats than PNG uses internally are not supported by
+LodePNG on purpose, there are myriads of formats, including endianness of 16-bit
+colors, the order in which you store R, G, B and A, and so on. Supporting and
+converting to/from all that is outside the scope of LodePNG.
+
+This may mean that, depending on your use case, you may want to convert the big
+endian output of LodePNG to little endian with a for loop. This is certainly not
+always needed, many applications and libraries support big endian 16-bit colors
+anyway, but it means you cannot simply cast the unsigned char* buffer to an
+unsigned short* buffer on x86 CPUs.
+
+
+7. error values
+---------------
+
+All functions in LodePNG that return an error code, return 0 if everything went
+OK, or a non-zero code if there was an error.
+
+The meaning of the LodePNG error values can be retrieved with the function
+lodepng_error_text: given the numerical error code, it returns a description
+of the error in English as a string.
+
+Check the implementation of lodepng_error_text to see the meaning of each code.
+
+
+8. chunks and PNG editing
+-------------------------
+
+If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG
+editor that should follow the rules about handling of unknown chunks, or if your
+program is able to read other types of chunks than the ones handled by LodePNG,
+then that's possible with the chunk functions of LodePNG.
+
+A PNG chunk has the following layout:
+
+4 bytes length
+4 bytes type name
+length bytes data
+4 bytes CRC
+
+8.1. iterating through chunks
+-----------------------------
+
+If you have a buffer containing the PNG image data, then the first chunk (the
+IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the
+signature of the PNG and are not part of a chunk. But if you start at byte 8
+then you have a chunk, and can check the following things of it.
+
+NOTE: none of these functions check for memory buffer boundaries. To avoid
+exploits, always make sure the buffer contains all the data of the chunks.
+When using lodepng_chunk_next, make sure the returned value is within the
+allocated memory.
+
+unsigned lodepng_chunk_length(const unsigned char* chunk):
+
+Get the length of the chunk's data. The total chunk length is this length + 12.
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk):
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type):
+
+Get the type of the chunk or compare if it's a certain type
+
+unsigned char lodepng_chunk_critical(const unsigned char* chunk):
+unsigned char lodepng_chunk_private(const unsigned char* chunk):
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk):
+
+Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are).
+Check if the chunk is private (public chunks are part of the standard, private ones not).
+Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical
+chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your
+program doesn't handle that type of unknown chunk.
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk):
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk):
+
+Get a pointer to the start of the data of the chunk.
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk):
+void lodepng_chunk_generate_crc(unsigned char* chunk):
+
+Check if the crc is correct or generate a correct one.
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk):
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk):
+
+Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these
+functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
+data available in the buffer to be able to go to the next chunk.
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data):
+
+These functions are used to create new chunks that are appended to the data in *out that has
+length *outlength. The append function appends an existing chunk to the new data. The create
+function creates a new chunk with the given parameters and appends it. Type is the 4-letter
+name of the chunk.
+
+8.2. chunks in info_png
+-----------------------
+
+The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3
+buffers (each with size) to contain 3 types of unknown chunks:
+the ones that come before the PLTE chunk, the ones that come between the PLTE
+and the IDAT chunks, and the ones that come after the IDAT chunks.
+It's necessary to make the distionction between these 3 cases because the PNG
+standard forces to keep the ordering of unknown chunks compared to the critical
+chunks, but does not force any other ordering rules.
+
+info_png.unknown_chunks_data[0] is the chunks before PLTE
+info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT
+info_png.unknown_chunks_data[2] is the chunks after IDAT
+
+The chunks in these 3 buffers can be iterated through and read by using the same
+way described in the previous subchapter.
+
+When using the decoder to decode a PNG, you can make it store all unknown chunks
+if you set the option settings.remember_unknown_chunks to 1. By default, this
+option is off (0).
+
+The encoder will always encode unknown chunks that are stored in the info_png.
+If you need it to add a particular chunk that isn't known by LodePNG, you can
+use lodepng_chunk_append or lodepng_chunk_create to the chunk data in
+info_png.unknown_chunks_data[x].
+
+Chunks that are known by LodePNG should not be added in that way. E.g. to make
+LodePNG add a bKGD chunk, set background_defined to true and add the correct
+parameters there instead.
+
+
+9. compiler support
+-------------------
+
+No libraries other than the current standard C library are needed to compile
+LodePNG. For the C++ version, only the standard C++ library is needed on top.
+Add the files lodepng.c(pp) and lodepng.h to your project, include
+lodepng.h where needed, and your program can read/write PNG files.
+
+It is compatible with C90 and up, and C++03 and up.
+
+If performance is important, use optimization when compiling! For both the
+encoder and decoder, this makes a large difference.
+
+Make sure that LodePNG is compiled with the same compiler of the same version
+and with the same settings as the rest of the program, or the interfaces with
+std::vectors and std::strings in C++ can be incompatible.
+
+CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets.
+
+*) gcc and g++
+
+LodePNG is developed in gcc so this compiler is natively supported. It gives no
+warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++
+version 4.7.1 on Linux, 32-bit and 64-bit.
+
+*) Clang
+
+Fully supported and warning-free.
+
+*) Mingw
+
+The Mingw compiler (a port of gcc for Windows) should be fully supported by
+LodePNG.
+
+*) Visual Studio and Visual C++ Express Edition
+
+LodePNG should be warning-free with warning level W4. Two warnings were disabled
+with pragmas though: warning 4244 about implicit conversions, and warning 4996
+where it wants to use a non-standard function fopen_s instead of the standard C
+fopen.
+
+Visual Studio may want "stdafx.h" files to be included in each source file and
+give an error "unexpected end of file while looking for precompiled header".
+This is not standard C++ and will not be added to the stock LodePNG. You can
+disable it for lodepng.cpp only by right clicking it, Properties, C/C++,
+Precompiled Headers, and set it to Not Using Precompiled Headers there.
+
+NOTE: Modern versions of VS should be fully supported, but old versions, e.g.
+VS6, are not guaranteed to work.
+
+*) Compilers on Macintosh
+
+LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for
+C and C++.
+
+*) Other Compilers
+
+If you encounter problems on any compilers, feel free to let me know and I may
+try to fix it if the compiler is modern and standards complient.
+
+
+10. examples
+------------
+
+This decoder example shows the most basic usage of LodePNG. More complex
+examples can be found on the LodePNG website.
+
+10.1. decoder C++ example
+-------------------------
+
+#include "lodepng.h"
+#include <iostream>
+
+int main(int argc, char *argv[])
+{
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  //load and decode
+  std::vector<unsigned char> image;
+  unsigned width, height;
+  unsigned error = lodepng::decode(image, width, height, filename);
+
+  //if there's an error, display it
+  if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
+
+  //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
+}
+
+10.2. decoder C example
+-----------------------
+
+#include "lodepng.h"
+
+int main(int argc, char *argv[])
+{
+  unsigned error;
+  unsigned char* image;
+  size_t width, height;
+  const char* filename = argc > 1 ? argv[1] : "test.png";
+
+  error = lodepng_decode32_file(&image, &width, &height, filename);
+
+  if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error));
+
+  / * use image here * /
+
+  free(image);
+  return 0;
+}
+
+11. state settings reference
+----------------------------
+
+A quick reference of some settings to set on the LodePNGState
+
+For decoding:
+
+state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums
+state.decoder.zlibsettings.custom_...: use custom inflate function
+state.decoder.ignore_crc: ignore CRC checksums
+state.decoder.color_convert: convert internal PNG color to chosen one
+state.decoder.read_text_chunks: whether to read in text metadata chunks
+state.decoder.remember_unknown_chunks: whether to read in unknown chunks
+state.info_raw.colortype: desired color type for decoded image
+state.info_raw.bitdepth: desired bit depth for decoded image
+state.info_raw....: more color settings, see struct LodePNGColorMode
+state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo
+
+For encoding:
+
+state.encoder.zlibsettings.btype: disable compression by setting it to 0
+state.encoder.zlibsettings.use_lz77: use LZ77 in compression
+state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize
+state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match
+state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching
+state.encoder.zlibsettings.lazymatching: try one more LZ77 matching
+state.encoder.zlibsettings.custom_...: use custom deflate function
+state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png
+state.encoder.filter_palette_zero: PNG filter strategy for palette
+state.encoder.filter_strategy: PNG filter strategy to encode with
+state.encoder.force_palette: add palette even if not encoding to one
+state.encoder.add_id: add LodePNG identifier and version as a text chunk
+state.encoder.text_compression: use compressed text chunks for metadata
+state.info_raw.colortype: color type of raw input image you provide
+state.info_raw.bitdepth: bit depth of raw input image you provide
+state.info_raw: more color settings, see struct LodePNGColorMode
+state.info_png.color.colortype: desired color type if auto_convert is false
+state.info_png.color.bitdepth: desired bit depth if auto_convert is false
+state.info_png.color....: more color settings, see struct LodePNGColorMode
+state.info_png....: more PNG related settings, see struct LodePNGInfo
+
+
+12. changes
+-----------
+
+The version number of LodePNG is the date of the change given in the format
+yyyymmdd.
+
+Some changes aren't backwards compatible. Those are indicated with a (!)
+symbol.
+
+*) 08 dec 2015: Made load_file function return error if file can't be opened.
+*) 24 okt 2015: Bugfix with decoding to palette output.
+*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding.
+*) 23 aug 2014: Reduced needless memory usage of decoder.
+*) 28 jun 2014: Removed fix_png setting, always support palette OOB for
+    simplicity. Made ColorProfile public.
+*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization.
+*) 22 dec 2013: Power of two windowsize required for optimization.
+*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
+*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
+*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
+    prefix for the custom allocators and made it possible with a new #define to
+    use custom ones in your project without needing to change lodepng's code.
+*) 28 jan 2013: Bugfix with color key.
+*) 27 okt 2012: Tweaks in text chunk keyword length error handling.
+*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
+    (no palette). Better deflate tree encoding. New compression tweak settings.
+    Faster color conversions while decoding. Some internal cleanups.
+*) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
+*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
+    and made it work with function pointers instead.
+*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
+    and free functions and toggle #defines from compiler flags. Small fixes.
+*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
+    redundant C++ codec classes. Reduced amount of structs. Everything changed,
+    but it is cleaner now imho and functionality remains the same. Also fixed
+    several bugs and shrunk the implementation code. Made new samples.
+*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
+    PNG color model and bit depth, based on the amount and type of colors of the
+    raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
+*) 9 okt 2011: simpler hash chain implementation for the encoder.
+*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching.
+*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking.
+    A bug with the PNG filtertype heuristic was fixed, so that it chooses much
+    better ones (it's quite significant). A setting to do an experimental, slow,
+    brute force search for PNG filter types is added.
+*) 17 aug 2011 (!): changed some C zlib related function names.
+*) 16 aug 2011: made the code less wide (max 120 characters per line).
+*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
+*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
+*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman
+    to optimize long sequences of zeros.
+*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and
+    LodePNG_InfoColor_canHaveAlpha functions for convenience.
+*) 7 nov 2010: added LodePNG_error_text function to get error code description.
+*) 30 okt 2010: made decoding slightly faster
+*) 26 okt 2010: (!) changed some C function and struct names (more consistent).
+     Reorganized the documentation and the declaration order in the header.
+*) 08 aug 2010: only changed some comments and external samples.
+*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version.
+*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers.
+*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could
+    read by ignoring the problem but windows apps couldn't.
+*) 06 jun 2008: added more error checks for out of memory cases.
+*) 26 apr 2008: added a few more checks here and there to ensure more safety.
+*) 06 mar 2008: crash with encoding of strings fixed
+*) 02 feb 2008: support for international text chunks added (iTXt)
+*) 23 jan 2008: small cleanups, and #defines to divide code in sections
+*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor.
+*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder.
+*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added
+    Also various fixes, such as in the deflate and the padding bits code.
+*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved
+    filtering code of encoder.
+*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A
+    C++ wrapper around this provides an interface almost identical to before.
+    Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code
+    are together in these files but it works both for C and C++ compilers.
+*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks
+*) 30 aug 2007: bug fixed which makes this Borland C++ compatible
+*) 09 aug 2007: some VS2005 warnings removed again
+*) 21 jul 2007: deflate code placed in new namespace separate from zlib code
+*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images
+*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing
+    invalid std::vector element [0] fixed, and level 3 and 4 warnings removed
+*) 02 jun 2007: made the encoder add a tag with version by default
+*) 27 may 2007: zlib and png code separated (but still in the same file),
+    simple encoder/decoder functions added for more simple usage cases
+*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69),
+    moved some examples from here to lodepng_examples.cpp
+*) 12 may 2007: palette decoding bug fixed
+*) 24 apr 2007: changed the license from BSD to the zlib license
+*) 11 mar 2007: very simple addition: ability to encode bKGD chunks.
+*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding
+    palettized PNG images. Plus little interface change with palette and texts.
+*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes.
+    Fixed a bug where the end code of a block had length 0 in the Huffman tree.
+*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented
+    and supported by the encoder, resulting in smaller PNGs at the output.
+*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone.
+*) 24 jan 2007: gave encoder an error interface. Added color conversion from any
+    greyscale type to 8-bit greyscale with or without alpha.
+*) 21 jan 2007: (!) Totally changed the interface. It allows more color types
+    to convert to and is more uniform. See the manual for how it works now.
+*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days:
+    encode/decode custom tEXt chunks, separate classes for zlib & deflate, and
+    at last made the decoder give errors for incorrect Adler32 or Crc.
+*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel.
+*) 29 dec 2006: Added support for encoding images without alpha channel, and
+    cleaned out code as well as making certain parts faster.
+*) 28 dec 2006: Added "Settings" to the encoder.
+*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now.
+    Removed some code duplication in the decoder. Fixed little bug in an example.
+*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter.
+    Fixed a bug of the decoder with 16-bit per color.
+*) 15 okt 2006: Changed documentation structure
+*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the
+    given image buffer, however for now it's not compressed.
+*) 08 sep 2006: (!) Changed to interface with a Decoder class
+*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different
+    way. Renamed decodePNG to decodePNGGeneric.
+*) 29 jul 2006: (!) Changed the interface: image info is now returned as a
+    struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy.
+*) 28 jul 2006: Cleaned the code and added new error checks.
+    Corrected terminology "deflate" into "inflate".
+*) 23 jun 2006: Added SDL example in the documentation in the header, this
+    example allows easy debugging by displaying the PNG and its transparency.
+*) 22 jun 2006: (!) Changed way to obtain error value. Added
+    loadFile function for convenience. Made decodePNG32 faster.
+*) 21 jun 2006: (!) Changed type of info vector to unsigned.
+    Changed position of palette in info vector. Fixed an important bug that
+    happened on PNGs with an uncompressed block.
+*) 16 jun 2006: Internally changed unsigned into unsigned where
+    needed, and performed some optimizations.
+*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them
+    in LodePNG namespace. Changed the order of the parameters. Rewrote the
+    documentation in the header. Renamed files to lodepng.cpp and lodepng.h
+*) 22 apr 2006: Optimized and improved some code
+*) 07 sep 2005: (!) Changed to std::vector interface
+*) 12 aug 2005: Initial release (C++, decoder only)
+
+
+13. contact information
+-----------------------
+
+Feel free to contact me with suggestions, problems, comments, ... concerning
+LodePNG. If you encounter a PNG image that doesn't work properly with this
+decoder, feel free to send it and I'll use it to find and fix the problem.
+
+My email address is (puzzle the account and domain together with an @ symbol):
+Domain: gmail dot com.
+Account: lode dot vandevenne.
+
+
+Copyright (c) 2005-2016 Lode Vandevenne
+*/