diff --git a/AssetStudio.sln b/AssetStudio.sln index 1f8b678..91725e0 100644 --- a/AssetStudio.sln +++ b/AssetStudio.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27130.2024 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29806.167 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AssetStudioGUI", "AssetStudioGUI\AssetStudioGUI.csproj", "{24551E2D-E9B6-4CD6-8F2A-D9F4A13E7853}" EndProject @@ -11,6 +11,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AssetStudioUtility", "Asset EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AssetStudio", "AssetStudio\AssetStudio.csproj", "{AF56B63C-1764-41B7-9E60-8D485422AC3B}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Texture2DDecoder", "Texture2DDecoder\Texture2DDecoder.vcxproj", "{57CFF625-57AB-424A-9B6B-B5ED01282E92}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -51,6 +53,14 @@ Global {AF56B63C-1764-41B7-9E60-8D485422AC3B}.Release|x64.Build.0 = Release|Any CPU {AF56B63C-1764-41B7-9E60-8D485422AC3B}.Release|x86.ActiveCfg = Release|Any CPU {AF56B63C-1764-41B7-9E60-8D485422AC3B}.Release|x86.Build.0 = Release|Any CPU + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Debug|x64.ActiveCfg = Debug|x64 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Debug|x64.Build.0 = Debug|x64 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Debug|x86.ActiveCfg = Debug|Win32 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Debug|x86.Build.0 = Debug|Win32 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Release|x64.ActiveCfg = Release|x64 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Release|x64.Build.0 = Release|x64 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Release|x86.ActiveCfg = Release|Win32 + {57CFF625-57AB-424A-9B6B-B5ED01282E92}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/AssetStudio/Classes/Texture2D.cs b/AssetStudio/Classes/Texture2D.cs index cb6c755..e11671b 100644 --- a/AssetStudio/Classes/Texture2D.cs +++ b/AssetStudio/Classes/Texture2D.cs @@ -170,5 +170,11 @@ namespace AssetStudio R8, ETC_RGB4Crunched, ETC2_RGBA8Crunched, + ASTC_HDR_4x4, + ASTC_HDR_5x5, + ASTC_HDR_6x6, + ASTC_HDR_8x8, + ASTC_HDR_10x10, + ASTC_HDR_12x12, } } \ No newline at end of file diff --git a/AssetStudioGUI/Exporter.cs b/AssetStudioGUI/Exporter.cs index ed3b15e..468711e 100644 --- a/AssetStudioGUI/Exporter.cs +++ b/AssetStudioGUI/Exporter.cs @@ -12,11 +12,11 @@ namespace AssetStudioGUI { public static bool ExportTexture2D(AssetItem item, string exportPathName) { - var converter = new Texture2DConverter((Texture2D)item.Asset); + var m_Texture2D = (Texture2D)item.Asset; var convertTexture = (bool)Properties.Settings.Default["convertTexture"]; if (convertTexture) { - var bitmap = converter.ConvertToBitmap(true); + var bitmap = m_Texture2D.ConvertToBitmap(true); if (bitmap == null) return false; ImageFormat format = null; @@ -52,10 +52,10 @@ namespace AssetStudioGUI } else { - var exportFullName = exportPathName + item.Text + converter.GetExtensionName(); + var exportFullName = exportPathName + item.Text + ".tex"; if (ExportFileExists(exportFullName)) return false; - File.WriteAllBytes(exportFullName, converter.ConvertToContainer()); + File.WriteAllBytes(exportFullName, m_Texture2D.image_data.Value); return true; } } diff --git a/AssetStudioGUI/Libraries/FMOD LICENSE.TXT b/AssetStudioGUI/Libraries/FMOD LICENSE.TXT deleted file mode 100644 index ce8dd13..0000000 --- a/AssetStudioGUI/Libraries/FMOD LICENSE.TXT +++ /dev/null @@ -1,151 +0,0 @@ -FMOD, FMOD Ex, FMOD Designer and FMOD Studio are -Copyright 2005-2016 Firelight Technologies Pty, Ltd. - -GRANT OF LICENSE ----------------- -THIS END USER LICENSE AGREEMENT GRANTS THE USER, THE RIGHT TO USE FMOD, -IN ITS LIBRARY AND TOOL FORM, IN THEIR OWN PRODUCTS, BE THEY FOR PERSONAL, -EDUCATIONAL OR COMMERCIAL USE. -THE USER MUST ADHERE TO THE LICENSING MODEL PROVIDED BY FIRELIGHT -TECHNOLOGIES, AND MUST APPLY FOR A LICENSE IF NECESSARY. THE FOLLOWING -LICENSES ARE AVAILABLE. - -FMOD NON-COMMERCIAL LICENSE ------------------------------------- -IF YOUR PRODUCT IS NOT INTENDED FOR COMMERCIAL GAIN AND DOES NOT -INCLUDE THE FMOD LIBRARY FOR RESALE, LICENSE OR OTHER COMMERCIAL -DISTRIBUTION, THEN USE OF FMOD IS FREE OF CHARGE. THERE ARE NO -LICENSE FEES FOR NON-COMMERCIAL APPLICATIONS. -THE USER MAY USE THIS EULA AS EVIDENCE OF THEIR LICENSE WITHOUT -CONTACTING FIRELIGHT TECHNOLOGIES. - -CONDITIONS/LIMITATIONS: -- WHEN USING THIS LICENSE, THE FMOD LIBRARY CANNOT BE USED FOR - RESALE OR OTHER COMMERCIAL DISTRIBUTION -- THIS LICENSE CANNOT BE USED FOR PRODUCTS WHICH DO NOT MAKE - PROFIT BUT ARE STILL COMMERCIALLY RELEASED -- THIS LICENSE CANNOT BE USED FOR COMMERCIAL SERVICES, WHERE THE - EXECUTABLE CONTAINING FMOD IS NOT SOLD, BUT THE DATA IS. -- WHEN USING FMOD, A CREDIT LINE IS REQUIRED IN EITHER DOCUMENTATION, - OR 'ON SCREEN' FORMAT (IF POSSIBLE). IT SHOULD CONTAIN AT LEAST - THE WORDS "FMOD" (OR "FMOD STUDIO" IF APPLICABLE) AND - "FIRELIGHT TECHNOLOGIES." - LOGOS ARE AVAILABLE FOR BOX OR MANUAL ART, BUT ARE NOT MANDATORY. - AN EXAMPLE CREDIT COULD BE: - - FMOD Sound System, copyright Firelight Technologies Pty, Ltd., 1994-2016. - OR - FMOD Studio, copyright Firelight Technologies Pty, Ltd., 1994-2016. - OR - Audio Engine supplied by FMOD by Firelight Technologies. - - NOTE THIS IN ADVANCE, AS IT MUST BE DONE BEFORE SHIPPING YOUR - PRODUCT WITH FMOD. - -FMOD FREE FOR INDIES LICENSE (FMOD STUDIO ONLY) ------------------------------------------------- -INDIE DEVELOPERS ARE CONSIDERED BY OUR LICENSING MODEL, DEVELOPERS THAT DEVELOP -A TITLE FOR UNDER $100K USD (TYPICALLY CONSIDERED AN 'INDIE' TITLE) TOTAL -BUDGET, MEANING YOUR TOTAL COSTS ARE LESS THAN $100K USD AT TIME OF SHIPPING, -YOU CAN USE FMOD FOR FREE. - -CONDITIONS/LIMITATIONS -- PLEASE WRITE TO SALES@FMOD.COM WITH THE NAME OF YOUR TITLE, RELEASE DATE - AND PLATFORMS SO WE CAN REGISTER YOU IN OUR SYSTEM. -- THERE IS NO RESTRICTION ON PLATFORM, ANY PLATFORM COMBINATION MAY BE USED. -- INCOME IS NOT RELEVANT TO THE BUDGET LEVEL, IT MUST BE EXPENSE RELATED. -- WHEN USING FMOD, A CREDIT LINE IS REQUIRED IN EITHER DOCUMENTATION, - OR 'ON SCREEN' FORMAT (IF POSSIBLE). IT SHOULD CONTAIN AT LEAST - THE WORDS FMOD STUDIO AND FIRELIGHT TECHNOLOGIES. - LOGOS ARE AVAILABLE FOR BOX OR MANUAL ART, BUT ARE NOT MANDATORY. - AN EXAMPLE CREDIT COULD BE: - - FMOD STUDIO, COPYRIGHT FIRELIGHT TECHNOLOGIES PTY, LTD., 1994-2016. - -COMMERCIAL USAGE (FMOD EX AND FMOD STUDIO) ------------------------------------------- -IF THE PRODUCT THAT USES FMOD IS INTENDED TO GENERATE INCOME, VIA DIRECT SALES -OR INDIRECT REVENUE (SUCH AS ADVERTISING, DONATIONS, CONTRACT FEE) THEN THE -DEVELOPER MUST APPLY TO FIRELIGHT TECHNOLOGIES FOR A COMMERCIAL LICENSE (UNLESS -THE USER QUALIFIES FOR AN FMOD STUDIO 'INDIE LICENSE'). -TO APPLY FOR THIS LICENSE WRITE TO SALES@FMOD.COM WITH THE RELEVANT DETAILS. - -REDISTRIBUTION LICENSE (FMOD EX AND FMOD STUDIO) ------------------------------------------------- -IF THE USER WISHES TO REDISTRIBUTE FMOD AS PART OF AN ENGINE OR TOOL SOLUTION, -THE USER MUST APPLY TO FIRELIGHT TECHNOLOGIES TO BE GRANTED A 'REDISTRIBUTION -LICENSE'. -TO APPLY FOR THIS LICENSE WRITE TO SALES@FMOD.COM WITH THE RELEVANT DETAILS. - -WARRANTY AND LIMITATION OF LIABILITY ------------------------------------- -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -FMOD Uses Ogg Vorbis codec. BSD license. ------------------------------------------ -Copyright (c) 2002, Xiph.org Foundation - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -- Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -- Neither the name of the Xiph.org Foundation nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION -OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -For Android platform code. --------------------------- -Copyright (C) 2010 The Android Open Source Project -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. \ No newline at end of file diff --git a/AssetStudioGUI/Libraries/brotli_license.txt b/AssetStudioGUI/Libraries/brotli_license.txt deleted file mode 100644 index 0fe2422..0000000 --- a/AssetStudioGUI/Libraries/brotli_license.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/AssetStudioGUI/Libraries/crunch_license.txt b/AssetStudioGUI/Libraries/crunch_license.txt deleted file mode 100644 index 1b30d78..0000000 --- a/AssetStudioGUI/Libraries/crunch_license.txt +++ /dev/null @@ -1,22 +0,0 @@ -crunch/crnlib uses the ZLIB license: -http://opensource.org/licenses/Zlib - -Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software -in a product, an acknowledgment in the product documentation would be -appreciated but is not required. - -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. - -3. This notice may not be removed or altered from any source distribution. diff --git a/AssetStudioGUI/Libraries/texgenpack_license.txt b/AssetStudioGUI/Libraries/texgenpack_license.txt deleted file mode 100644 index 08d2fed..0000000 --- a/AssetStudioGUI/Libraries/texgenpack_license.txt +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) 2015 Harm Hanemaaijer - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file diff --git a/AssetStudioGUI/Libraries/x64/PVRTexLib.dll b/AssetStudioGUI/Libraries/x64/PVRTexLib.dll deleted file mode 100644 index 6cf573b..0000000 Binary files a/AssetStudioGUI/Libraries/x64/PVRTexLib.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/PVRTexLibWrapper.dll b/AssetStudioGUI/Libraries/x64/PVRTexLibWrapper.dll deleted file mode 100644 index 9f40bd6..0000000 Binary files a/AssetStudioGUI/Libraries/x64/PVRTexLibWrapper.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/TextureConverter.dll b/AssetStudioGUI/Libraries/x64/TextureConverter.dll deleted file mode 100644 index 45cd4e9..0000000 Binary files a/AssetStudioGUI/Libraries/x64/TextureConverter.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/TextureConverterWrapper.dll b/AssetStudioGUI/Libraries/x64/TextureConverterWrapper.dll deleted file mode 100644 index dd7aa6f..0000000 Binary files a/AssetStudioGUI/Libraries/x64/TextureConverterWrapper.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/astc.dll b/AssetStudioGUI/Libraries/x64/astc.dll deleted file mode 100644 index 6921b51..0000000 Binary files a/AssetStudioGUI/Libraries/x64/astc.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/crunch.dll b/AssetStudioGUI/Libraries/x64/crunch.dll deleted file mode 100644 index 6509c7f..0000000 Binary files a/AssetStudioGUI/Libraries/x64/crunch.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/crunchunity.dll b/AssetStudioGUI/Libraries/x64/crunchunity.dll deleted file mode 100644 index 7516cfb..0000000 Binary files a/AssetStudioGUI/Libraries/x64/crunchunity.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x64/libfbxsdk.dll b/AssetStudioGUI/Libraries/x64/libfbxsdk.dll index d4e6a01..36017ba 100644 Binary files a/AssetStudioGUI/Libraries/x64/libfbxsdk.dll and b/AssetStudioGUI/Libraries/x64/libfbxsdk.dll differ diff --git a/AssetStudioGUI/Libraries/x64/texgenpack.dll b/AssetStudioGUI/Libraries/x64/texgenpack.dll deleted file mode 100644 index 48bb2c1..0000000 Binary files a/AssetStudioGUI/Libraries/x64/texgenpack.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/PVRTexLib.dll b/AssetStudioGUI/Libraries/x86/PVRTexLib.dll deleted file mode 100644 index 602d264..0000000 Binary files a/AssetStudioGUI/Libraries/x86/PVRTexLib.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/PVRTexLibWrapper.dll b/AssetStudioGUI/Libraries/x86/PVRTexLibWrapper.dll deleted file mode 100644 index 7c7db83..0000000 Binary files a/AssetStudioGUI/Libraries/x86/PVRTexLibWrapper.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/TextureConverter.dll b/AssetStudioGUI/Libraries/x86/TextureConverter.dll deleted file mode 100644 index 5a7c3e7..0000000 Binary files a/AssetStudioGUI/Libraries/x86/TextureConverter.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/TextureConverterWrapper.dll b/AssetStudioGUI/Libraries/x86/TextureConverterWrapper.dll deleted file mode 100644 index 551f1e8..0000000 Binary files a/AssetStudioGUI/Libraries/x86/TextureConverterWrapper.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/astc.dll b/AssetStudioGUI/Libraries/x86/astc.dll deleted file mode 100644 index b62429d..0000000 Binary files a/AssetStudioGUI/Libraries/x86/astc.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/crunch.dll b/AssetStudioGUI/Libraries/x86/crunch.dll deleted file mode 100644 index 749ec02..0000000 Binary files a/AssetStudioGUI/Libraries/x86/crunch.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/crunchunity.dll b/AssetStudioGUI/Libraries/x86/crunchunity.dll deleted file mode 100644 index 90cf87a..0000000 Binary files a/AssetStudioGUI/Libraries/x86/crunchunity.dll and /dev/null differ diff --git a/AssetStudioGUI/Libraries/x86/libfbxsdk.dll b/AssetStudioGUI/Libraries/x86/libfbxsdk.dll index 656413b..27e216f 100644 Binary files a/AssetStudioGUI/Libraries/x86/libfbxsdk.dll and b/AssetStudioGUI/Libraries/x86/libfbxsdk.dll differ diff --git a/AssetStudioGUI/Libraries/x86/texgenpack.dll b/AssetStudioGUI/Libraries/x86/texgenpack.dll deleted file mode 100644 index dea4675..0000000 Binary files a/AssetStudioGUI/Libraries/x86/texgenpack.dll and /dev/null differ diff --git a/AssetStudioUtility/AssetStudioUtility.csproj b/AssetStudioUtility/AssetStudioUtility.csproj index bba572b..12fb331 100644 --- a/AssetStudioUtility/AssetStudioUtility.csproj +++ b/AssetStudioUtility/AssetStudioUtility.csproj @@ -73,6 +73,7 @@ + @@ -83,6 +84,10 @@ {af56b63c-1764-41b7-9e60-8d485422ac3b} AssetStudio + + {57cff625-57ab-424a-9b6b-b5ed01282e92} + Texture2DDecoder + \ No newline at end of file diff --git a/AssetStudioUtility/Texture2DConverter.cs b/AssetStudioUtility/Texture2DConverter.cs index 2bdf1d1..d205e45 100644 --- a/AssetStudioUtility/Texture2DConverter.cs +++ b/AssetStudioUtility/Texture2DConverter.cs @@ -1,76 +1,21 @@ using System; using System.Drawing; using System.Drawing.Imaging; -using System.IO; using System.Linq; using System.Runtime.InteropServices; +using Texture2DDecoder; namespace AssetStudio { public class Texture2DConverter { - //Texture2D private int m_Width; private int m_Height; private TextureFormat m_TextureFormat; private int image_data_size; private byte[] image_data; private int[] version; - - //DDS Start - private byte[] dwMagic = { 0x44, 0x44, 0x53, 0x20, 0x7c }; - private int dwFlags = 0x1 + 0x2 + 0x4 + 0x1000; - //public int dwHeight; m_Height - //public int dwWidth; m_Width - private int dwPitchOrLinearSize; - private int dwMipMapCount = 0x1; - private int dwSize = 0x20; - private int dwFlags2; - private int dwFourCC; - private int dwRGBBitCount; - private int dwRBitMask; - private int dwGBitMask; - private int dwBBitMask; - private int dwABitMask; - private int dwCaps = 0x1000; - private int dwCaps2 = 0x0; - //DDS End - //PVR Start - private int pvrVersion = 0x03525650; - private int pvrFlags = 0x0; - private long pvrPixelFormat; - private int pvrColourSpace = 0x0; - private int pvrChannelType = 0x0; - //public int pvrHeight; m_Height - //public int pvrWidth; m_Width - private int pvrDepth = 0x1; - private int pvrNumSurfaces = 0x1; //For texture arrays - private int pvrNumFaces = 0x1; //For cube maps - //public int pvrMIPMapCount; dwMipMapCount - private int pvrMetaDataSize = 0x0; - //PVR End - //KTX Start - private int glType = 0; - private int glTypeSize = 1; - private int glFormat = 0; - private int glInternalFormat; - private int glBaseInternalFormat; - //public int pixelWidth; m_Width - //public int pixelHeight; m_Height - private int pixelDepth = 0; - private int numberOfArrayElements = 0; - private int numberOfFaces = 1; - private int numberOfMipmapLevels = 1; - private int bytesOfKeyValueData = 0; - //KTX End - //TextureConverter - private QFORMAT q_format; - //texgenpack - private texgenpack_texturetype texturetype; - //astc - private int astcBlockWidth; - private int astcBlockHeight; - + private BuildTarget platform; public Texture2DConverter(Texture2D m_Texture2D) { @@ -81,1128 +26,704 @@ namespace AssetStudio m_Width = m_Texture2D.m_Width; m_Height = m_Texture2D.m_Height; m_TextureFormat = m_Texture2D.m_TextureFormat; - var mMipMap = m_Texture2D.m_MipMap; version = m_Texture2D.version; - var platform = m_Texture2D.platform; - - if (version[0] < 5 || (version[0] == 5 && version[1] < 2))//5.2 down - { - if (mMipMap) - { - dwFlags += 0x20000; - dwMipMapCount = Convert.ToInt32(Math.Log(Math.Max(m_Width, m_Height)) / Math.Log(2)); - dwCaps += 0x400008; - } - } - else - { - dwFlags += 0x20000; - dwMipMapCount = m_Texture2D.m_MipCount; - dwCaps += 0x400008; - } - - - switch (m_TextureFormat) - { - //TODO 导出到DDS容器时应该用原像素还是转换以后的像素? - case TextureFormat.Alpha8: //test pass - { - /*dwFlags2 = 0x2; - dwRGBBitCount = 0x8; - dwRBitMask = 0x0; - dwGBitMask = 0x0; - dwBBitMask = 0x0; - dwABitMask = 0xFF; */ - - //转BGRA32 - var BGRA32 = Enumerable.Repeat(0xFF, image_data_size * 4).ToArray(); - for (var i = 0; i < image_data_size; i++) - { - BGRA32[i * 4 + 3] = image_data[i]; - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.ARGB4444: //test pass - { - SwapBytesForXbox(platform); - - /*dwFlags2 = 0x41; - dwRGBBitCount = 0x10; - dwRBitMask = 0xF00; - dwGBitMask = 0xF0; - dwBBitMask = 0xF; - dwABitMask = 0xF000;*/ - - //转BGRA32 - var BGRA32 = new byte[image_data_size * 2]; - for (var i = 0; i < image_data_size / 2; i++) - { - var pixelNew = new byte[4]; - var pixelOldShort = BitConverter.ToUInt16(image_data, i * 2); - pixelNew[0] = (byte)(pixelOldShort & 0x000f); - pixelNew[1] = (byte)((pixelOldShort & 0x00f0) >> 4); - pixelNew[2] = (byte)((pixelOldShort & 0x0f00) >> 8); - pixelNew[3] = (byte)((pixelOldShort & 0xf000) >> 12); - // convert range - for (var j = 0; j < 4; j++) - pixelNew[j] = (byte)((pixelNew[j] << 4) | pixelNew[j]); - pixelNew.CopyTo(BGRA32, i * 4); - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.RGB24: //test pass - { - /*dwFlags2 = 0x40; - dwRGBBitCount = 0x18; - dwRBitMask = 0xFF; - dwGBitMask = 0xFF00; - dwBBitMask = 0xFF0000; - dwABitMask = 0x0;*/ - - //转BGRA32 - var BGRA32 = new byte[image_data_size / 3 * 4]; - for (var i = 0; i < image_data_size / 3; i++) - { - BGRA32[i * 4] = image_data[i * 3 + 2]; - BGRA32[i * 4 + 1] = image_data[i * 3 + 1]; - BGRA32[i * 4 + 2] = image_data[i * 3 + 0]; - BGRA32[i * 4 + 3] = 255; - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.RGBA32: //test pass - { - /*dwFlags2 = 0x41; - dwRGBBitCount = 0x20; - dwRBitMask = 0xFF; - dwGBitMask = 0xFF00; - dwBBitMask = 0xFF0000; - dwABitMask = -16777216;*/ - - //转BGRA32 - var BGRA32 = new byte[image_data_size]; - for (var i = 0; i < image_data_size; i += 4) - { - BGRA32[i] = image_data[i + 2]; - BGRA32[i + 1] = image_data[i + 1]; - BGRA32[i + 2] = image_data[i + 0]; - BGRA32[i + 3] = image_data[i + 3]; - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.ARGB32://test pass - { - /*dwFlags2 = 0x41; - dwRGBBitCount = 0x20; - dwRBitMask = 0xFF00; - dwGBitMask = 0xFF0000; - dwBBitMask = -16777216; - dwABitMask = 0xFF;*/ - - //转BGRA32 - var BGRA32 = new byte[image_data_size]; - for (var i = 0; i < image_data_size; i += 4) - { - BGRA32[i] = image_data[i + 3]; - BGRA32[i + 1] = image_data[i + 2]; - BGRA32[i + 2] = image_data[i + 1]; - BGRA32[i + 3] = image_data[i + 0]; - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.RGB565: //test pass - { - SwapBytesForXbox(platform); - - dwFlags2 = 0x40; - dwRGBBitCount = 0x10; - dwRBitMask = 0xF800; - dwGBitMask = 0x7E0; - dwBBitMask = 0x1F; - dwABitMask = 0x0; - break; - } - case TextureFormat.R16: //test pass - { - //转BGRA32 - var BGRA32 = new byte[image_data_size * 2]; - for (var i = 0; i < image_data_size; i += 2) - { - float f = Half.ToHalf(image_data, i); - BGRA32[i * 2 + 2] = (byte)Math.Ceiling(f * 255);//R - BGRA32[i * 2 + 3] = 255;//A - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.DXT1: //test pass - case TextureFormat.DXT1Crunched: //test pass - { - SwapBytesForXbox(platform); - - if (mMipMap) - { - dwPitchOrLinearSize = m_Height * m_Width / 2; - } - dwFlags2 = 0x4; - dwFourCC = 0x31545844; - dwRGBBitCount = 0x0; - dwRBitMask = 0x0; - dwGBitMask = 0x0; - dwBBitMask = 0x0; - dwABitMask = 0x0; - - q_format = QFORMAT.Q_FORMAT_S3TC_DXT1_RGB; - break; - } - case TextureFormat.DXT5: //test pass - case TextureFormat.DXT5Crunched: //test pass - { - SwapBytesForXbox(platform); - - if (mMipMap) - { - dwPitchOrLinearSize = m_Height * m_Width / 2; - } - dwFlags2 = 0x4; - dwFourCC = 0x35545844; - dwRGBBitCount = 0x0; - dwRBitMask = 0x0; - dwGBitMask = 0x0; - dwBBitMask = 0x0; - dwABitMask = 0x0; - - q_format = QFORMAT.Q_FORMAT_S3TC_DXT5_RGBA; - break; - } - case TextureFormat.RGBA4444: //test pass - { - /*dwFlags2 = 0x41; - dwRGBBitCount = 0x10; - dwRBitMask = 0xF000; - dwGBitMask = 0xF00; - dwBBitMask = 0xF0; - dwABitMask = 0xF;*/ - - //转BGRA32 - var BGRA32 = new byte[image_data_size * 2]; - for (var i = 0; i < image_data_size / 2; i++) - { - var pixelNew = new byte[4]; - var pixelOldShort = BitConverter.ToUInt16(image_data, i * 2); - pixelNew[0] = (byte)((pixelOldShort & 0x00f0) >> 4); - pixelNew[1] = (byte)((pixelOldShort & 0x0f00) >> 8); - pixelNew[2] = (byte)((pixelOldShort & 0xf000) >> 12); - pixelNew[3] = (byte)(pixelOldShort & 0x000f); - // convert range - for (var j = 0; j < 4; j++) - pixelNew[j] = (byte)((pixelNew[j] << 4) | pixelNew[j]); - pixelNew.CopyTo(BGRA32, i * 4); - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.BGRA32: //test pass - { - dwFlags2 = 0x41; - dwRGBBitCount = 0x20; - dwRBitMask = 0xFF0000; - dwGBitMask = 0xFF00; - dwBBitMask = 0xFF; - dwABitMask = -16777216; - break; - } - case TextureFormat.RHalf: //test pass - { - q_format = QFORMAT.Q_FORMAT_R_16F; - glInternalFormat = KTXHeader.GL_R16F; - glBaseInternalFormat = KTXHeader.GL_RED; - break; - } - case TextureFormat.RGHalf: //test pass - { - q_format = QFORMAT.Q_FORMAT_RG_HF; - glInternalFormat = KTXHeader.GL_RG16F; - glBaseInternalFormat = KTXHeader.GL_RG; - break; - } - case TextureFormat.RGBAHalf: //test pass - { - q_format = QFORMAT.Q_FORMAT_RGBA_HF; - glInternalFormat = KTXHeader.GL_RGBA16F; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.RFloat: //test pass - { - q_format = QFORMAT.Q_FORMAT_R_F; - glInternalFormat = KTXHeader.GL_R32F; - glBaseInternalFormat = KTXHeader.GL_RED; - break; - } - case TextureFormat.RGFloat: //test pass - { - q_format = QFORMAT.Q_FORMAT_RG_F; - glInternalFormat = KTXHeader.GL_RG32F; - glBaseInternalFormat = KTXHeader.GL_RG; - break; - } - case TextureFormat.RGBAFloat: //test pass - { - q_format = QFORMAT.Q_FORMAT_RGBA_F; - glInternalFormat = KTXHeader.GL_RGBA32F; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.YUY2: //test pass - { - pvrPixelFormat = 17; - break; - } - case TextureFormat.RGB9e5Float: //TODO Test failure - { - q_format = QFORMAT.Q_FORMAT_RGB9_E5; - break; - } - case TextureFormat.BC4: //test pass - { - texturetype = texgenpack_texturetype.RGTC1; - glInternalFormat = KTXHeader.GL_COMPRESSED_RED_RGTC1; - glBaseInternalFormat = KTXHeader.GL_RED; - break; - } - case TextureFormat.BC5: //test pass - { - texturetype = texgenpack_texturetype.RGTC2; - glInternalFormat = KTXHeader.GL_COMPRESSED_RG_RGTC2; - glBaseInternalFormat = KTXHeader.GL_RG; - break; - } - case TextureFormat.BC6H: //test pass - { - texturetype = texgenpack_texturetype.BPTC_FLOAT; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.BC7: //test pass - { - texturetype = texgenpack_texturetype.BPTC; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGBA_BPTC_UNORM; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.PVRTC_RGB2: //test pass - { - pvrPixelFormat = 0; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.PVRTC_RGBA2: //test pass - { - pvrPixelFormat = 1; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.PVRTC_RGB4: //test pass - { - pvrPixelFormat = 2; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.PVRTC_RGBA4: //test pass - { - pvrPixelFormat = 3; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.ETC_RGB4Crunched: //test pass - case TextureFormat.ETC_RGB4_3DS: //test pass - case TextureFormat.ETC_RGB4: //test pass - { - pvrPixelFormat = 6; - glInternalFormat = KTXHeader.GL_ETC1_RGB8_OES; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.ATC_RGB4: //test pass - { - q_format = QFORMAT.Q_FORMAT_ATITC_RGB; - glInternalFormat = KTXHeader.GL_ATC_RGB_AMD; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.ATC_RGBA8: //test pass - { - q_format = QFORMAT.Q_FORMAT_ATC_RGBA_INTERPOLATED_ALPHA; - glInternalFormat = KTXHeader.GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.EAC_R: //test pass - { - q_format = QFORMAT.Q_FORMAT_EAC_R_UNSIGNED; - glInternalFormat = KTXHeader.GL_COMPRESSED_R11_EAC; - glBaseInternalFormat = KTXHeader.GL_RED; - break; - } - case TextureFormat.EAC_R_SIGNED: //test pass - { - q_format = QFORMAT.Q_FORMAT_EAC_R_SIGNED; - glInternalFormat = KTXHeader.GL_COMPRESSED_SIGNED_R11_EAC; - glBaseInternalFormat = KTXHeader.GL_RED; - break; - } - case TextureFormat.EAC_RG: //test pass - { - q_format = QFORMAT.Q_FORMAT_EAC_RG_UNSIGNED; - glInternalFormat = KTXHeader.GL_COMPRESSED_RG11_EAC; - glBaseInternalFormat = KTXHeader.GL_RG; - break; - } - case TextureFormat.EAC_RG_SIGNED: //test pass - { - q_format = QFORMAT.Q_FORMAT_EAC_RG_SIGNED; - glInternalFormat = KTXHeader.GL_COMPRESSED_SIGNED_RG11_EAC; - glBaseInternalFormat = KTXHeader.GL_RG; - break; - } - case TextureFormat.ETC2_RGB: //test pass - { - pvrPixelFormat = 22; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGB8_ETC2; - glBaseInternalFormat = KTXHeader.GL_RGB; - break; - } - case TextureFormat.ETC2_RGBA1: //test pass - { - pvrPixelFormat = 24; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.ETC2_RGBA8Crunched: //test pass - case TextureFormat.ETC_RGBA8_3DS: //test pass - case TextureFormat.ETC2_RGBA8: //test pass - { - pvrPixelFormat = 23; - glInternalFormat = KTXHeader.GL_COMPRESSED_RGBA8_ETC2_EAC; - glBaseInternalFormat = KTXHeader.GL_RGBA; - break; - } - case TextureFormat.ASTC_RGB_4x4: //test pass - case TextureFormat.ASTC_RGBA_4x4: //test pass - { - astcBlockWidth = 4; - astcBlockHeight = 4; - break; - } - case TextureFormat.ASTC_RGB_5x5: //test pass - case TextureFormat.ASTC_RGBA_5x5: //test pass - { - astcBlockWidth = 5; - astcBlockHeight = 5; - break; - } - case TextureFormat.ASTC_RGB_6x6: //test pass - case TextureFormat.ASTC_RGBA_6x6: //test pass - { - astcBlockWidth = 6; - astcBlockHeight = 6; - break; - } - case TextureFormat.ASTC_RGB_8x8: //test pass - case TextureFormat.ASTC_RGBA_8x8: //test pass - { - astcBlockWidth = 8; - astcBlockHeight = 8; - break; - } - case TextureFormat.ASTC_RGB_10x10: //test pass - case TextureFormat.ASTC_RGBA_10x10: //test pass - { - astcBlockWidth = 10; - astcBlockHeight = 10; - break; - } - case TextureFormat.ASTC_RGB_12x12: //test pass - case TextureFormat.ASTC_RGBA_12x12: //test pass - { - astcBlockWidth = 12; - astcBlockHeight = 12; - break; - } - case TextureFormat.RG16: //test pass - { - //转BGRA32 - var BGRA32 = new byte[image_data_size * 2]; - for (var i = 0; i < image_data_size; i += 2) - { - BGRA32[i * 2 + 1] = image_data[i + 1];//G - BGRA32[i * 2 + 2] = image_data[i];//R - BGRA32[i * 2 + 3] = 255;//A - } - SetBGRA32Info(BGRA32); - break; - } - case TextureFormat.R8: //test pass - { - //转BGRA32 - var BGRA32 = new byte[image_data_size * 4]; - for (var i = 0; i < image_data_size; i++) - { - BGRA32[i * 4 + 2] = image_data[i];//R - BGRA32[i * 4 + 3] = 255;//A - } - SetBGRA32Info(BGRA32); - break; - } - } - } - - private void SetBGRA32Info(byte[] BGRA32) - { - image_data = BGRA32; - image_data_size = BGRA32.Length; - dwFlags2 = 0x41; - dwRGBBitCount = 0x20; - dwRBitMask = 0xFF0000; - dwGBitMask = 0xFF00; - dwBBitMask = 0xFF; - dwABitMask = -16777216; - } - - private void SwapBytesForXbox(BuildTarget platform) - { - if (platform == BuildTarget.XBOX360) //swap bytes for Xbox confirmed, PS3 not encountered - { - for (var i = 0; i < image_data_size / 2; i++) - { - var b0 = image_data[i * 2]; - image_data[i * 2] = image_data[i * 2 + 1]; - image_data[i * 2 + 1] = b0; - } - } - } - - public string GetExtensionName() - { - switch (m_TextureFormat) - { - case TextureFormat.Alpha8: - case TextureFormat.ARGB4444: - case TextureFormat.RGB24: - case TextureFormat.RGBA32: - case TextureFormat.ARGB32: - case TextureFormat.RGB565: - case TextureFormat.R16: - case TextureFormat.DXT1: - case TextureFormat.DXT5: - case TextureFormat.RGBA4444: - case TextureFormat.BGRA32: - case TextureFormat.RG16: - case TextureFormat.R8: - return ".dds"; - case TextureFormat.DXT1Crunched: - case TextureFormat.DXT5Crunched: - case TextureFormat.ETC_RGB4Crunched: - case TextureFormat.ETC2_RGBA8Crunched: - return ".crn"; - case TextureFormat.YUY2: - case TextureFormat.PVRTC_RGB2: - case TextureFormat.PVRTC_RGBA2: - case TextureFormat.PVRTC_RGB4: - case TextureFormat.PVRTC_RGBA4: - case TextureFormat.ETC_RGB4: - case TextureFormat.ETC2_RGB: - case TextureFormat.ETC2_RGBA1: - case TextureFormat.ETC2_RGBA8: - case TextureFormat.ASTC_RGB_4x4: - case TextureFormat.ASTC_RGB_5x5: - case TextureFormat.ASTC_RGB_6x6: - case TextureFormat.ASTC_RGB_8x8: - case TextureFormat.ASTC_RGB_10x10: - case TextureFormat.ASTC_RGB_12x12: - case TextureFormat.ASTC_RGBA_4x4: - case TextureFormat.ASTC_RGBA_5x5: - case TextureFormat.ASTC_RGBA_6x6: - case TextureFormat.ASTC_RGBA_8x8: - case TextureFormat.ASTC_RGBA_10x10: - case TextureFormat.ASTC_RGBA_12x12: - case TextureFormat.ETC_RGB4_3DS: - case TextureFormat.ETC_RGBA8_3DS: - return ".pvr"; - case TextureFormat.RHalf: - case TextureFormat.RGHalf: - case TextureFormat.RGBAHalf: - case TextureFormat.RFloat: - case TextureFormat.RGFloat: - case TextureFormat.RGBAFloat: - case TextureFormat.BC4: - case TextureFormat.BC5: - case TextureFormat.BC6H: - case TextureFormat.BC7: - case TextureFormat.ATC_RGB4: - case TextureFormat.ATC_RGBA8: - case TextureFormat.EAC_R: - case TextureFormat.EAC_R_SIGNED: - case TextureFormat.EAC_RG: - case TextureFormat.EAC_RG_SIGNED: - return ".ktx"; - default: - return ".tex"; - } - } - - - public byte[] ConvertToContainer() - { - if (image_data == null || image_data.Length == 0) - return null; - switch (m_TextureFormat) - { - case TextureFormat.Alpha8: - case TextureFormat.ARGB4444: - case TextureFormat.RGB24: - case TextureFormat.RGBA32: - case TextureFormat.ARGB32: - case TextureFormat.RGB565: - case TextureFormat.R16: - case TextureFormat.DXT1: - case TextureFormat.DXT5: - case TextureFormat.RGBA4444: - case TextureFormat.BGRA32: - case TextureFormat.RG16: - case TextureFormat.R8: - return ConvertToDDS(); - case TextureFormat.YUY2: - case TextureFormat.PVRTC_RGB2: - case TextureFormat.PVRTC_RGBA2: - case TextureFormat.PVRTC_RGB4: - case TextureFormat.PVRTC_RGBA4: - case TextureFormat.ETC_RGB4: - case TextureFormat.ETC2_RGB: - case TextureFormat.ETC2_RGBA1: - case TextureFormat.ETC2_RGBA8: - case TextureFormat.ASTC_RGB_4x4: - case TextureFormat.ASTC_RGB_5x5: - case TextureFormat.ASTC_RGB_6x6: - case TextureFormat.ASTC_RGB_8x8: - case TextureFormat.ASTC_RGB_10x10: - case TextureFormat.ASTC_RGB_12x12: - case TextureFormat.ASTC_RGBA_4x4: - case TextureFormat.ASTC_RGBA_5x5: - case TextureFormat.ASTC_RGBA_6x6: - case TextureFormat.ASTC_RGBA_8x8: - case TextureFormat.ASTC_RGBA_10x10: - case TextureFormat.ASTC_RGBA_12x12: - case TextureFormat.ETC_RGB4_3DS: - case TextureFormat.ETC_RGBA8_3DS: - return ConvertToPVR(); - case TextureFormat.RHalf: - case TextureFormat.RGHalf: - case TextureFormat.RGBAHalf: - case TextureFormat.RFloat: - case TextureFormat.RGFloat: - case TextureFormat.RGBAFloat: - case TextureFormat.BC4: - case TextureFormat.BC5: - case TextureFormat.BC6H: - case TextureFormat.BC7: - case TextureFormat.ATC_RGB4: - case TextureFormat.ATC_RGBA8: - case TextureFormat.EAC_R: - case TextureFormat.EAC_R_SIGNED: - case TextureFormat.EAC_RG: - case TextureFormat.EAC_RG_SIGNED: - return ConvertToKTX(); - default: - return image_data; - } - } - - private byte[] ConvertToDDS() - { - var imageBuffer = new byte[128 + image_data_size]; - dwMagic.CopyTo(imageBuffer, 0); - BitConverter.GetBytes(dwFlags).CopyTo(imageBuffer, 8); - BitConverter.GetBytes(m_Height).CopyTo(imageBuffer, 12); - BitConverter.GetBytes(m_Width).CopyTo(imageBuffer, 16); - BitConverter.GetBytes(dwPitchOrLinearSize).CopyTo(imageBuffer, 20); - BitConverter.GetBytes(dwMipMapCount).CopyTo(imageBuffer, 28); - BitConverter.GetBytes(dwSize).CopyTo(imageBuffer, 76); - BitConverter.GetBytes(dwFlags2).CopyTo(imageBuffer, 80); - BitConverter.GetBytes(dwFourCC).CopyTo(imageBuffer, 84); - BitConverter.GetBytes(dwRGBBitCount).CopyTo(imageBuffer, 88); - BitConverter.GetBytes(dwRBitMask).CopyTo(imageBuffer, 92); - BitConverter.GetBytes(dwGBitMask).CopyTo(imageBuffer, 96); - BitConverter.GetBytes(dwBBitMask).CopyTo(imageBuffer, 100); - BitConverter.GetBytes(dwABitMask).CopyTo(imageBuffer, 104); - BitConverter.GetBytes(dwCaps).CopyTo(imageBuffer, 108); - BitConverter.GetBytes(dwCaps2).CopyTo(imageBuffer, 112); - image_data.CopyTo(imageBuffer, 128); - return imageBuffer; - } - - private byte[] ConvertToPVR() - { - var mstream = new MemoryStream(); - using (var writer = new BinaryWriter(mstream)) - { - writer.Write(pvrVersion); - writer.Write(pvrFlags); - writer.Write(pvrPixelFormat); - writer.Write(pvrColourSpace); - writer.Write(pvrChannelType); - writer.Write(m_Height); - writer.Write(m_Width); - writer.Write(pvrDepth); - writer.Write(pvrNumSurfaces); - writer.Write(pvrNumFaces); - writer.Write(dwMipMapCount); - writer.Write(pvrMetaDataSize); - writer.Write(image_data); - return mstream.ToArray(); - } - } - - private byte[] ConvertToKTX() - { - var mstream = new MemoryStream(); - using (var writer = new BinaryWriter(mstream)) - { - writer.Write(KTXHeader.IDENTIFIER); - writer.Write(KTXHeader.ENDIANESS_LE); - writer.Write(glType); - writer.Write(glTypeSize); - writer.Write(glFormat); - writer.Write(glInternalFormat); - writer.Write(glBaseInternalFormat); - writer.Write(m_Width); - writer.Write(m_Height); - writer.Write(pixelDepth); - writer.Write(numberOfArrayElements); - writer.Write(numberOfFaces); - writer.Write(numberOfMipmapLevels); - writer.Write(bytesOfKeyValueData); - writer.Write(image_data_size); - writer.Write(image_data); - return mstream.ToArray(); - } + platform = m_Texture2D.platform; } public Bitmap ConvertToBitmap(bool flip) { if (image_data == null || image_data.Length == 0) return null; - Bitmap bitmap; + var buff = DecodeTexture2D(); + if (buff == null) + { + return null; + } + var handle = GCHandle.Alloc(buff, GCHandleType.Pinned); + var scan0 = handle.AddrOfPinnedObject(); + var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, scan0); + handle.Free(); + if (flip) + { + bitmap.RotateFlip(RotateFlipType.RotateNoneFlipY); + } + return bitmap; + } + + public byte[] DecodeTexture2D() + { + byte[] bytes = null; switch (m_TextureFormat) { - case TextureFormat.Alpha8: - case TextureFormat.ARGB4444: - case TextureFormat.RGB24: - case TextureFormat.RGBA32: - case TextureFormat.ARGB32: - case TextureFormat.R16: - case TextureFormat.RGBA4444: - case TextureFormat.BGRA32: - case TextureFormat.RG16: - case TextureFormat.R8: - bitmap = BGRA32ToBitmap(); + case TextureFormat.Alpha8: //test pass + bytes = DecodeAlpha8(); break; - case TextureFormat.RGB565: - bitmap = RGB565ToBitmap(); + case TextureFormat.ARGB4444: //test pass + SwapBytesForXbox(); + bytes = DecodeARGB4444(); break; - case TextureFormat.YUY2: - case TextureFormat.PVRTC_RGB2: - case TextureFormat.PVRTC_RGBA2: - case TextureFormat.PVRTC_RGB4: - case TextureFormat.PVRTC_RGBA4: - case TextureFormat.ETC_RGB4: - case TextureFormat.ETC2_RGB: - case TextureFormat.ETC2_RGBA1: - case TextureFormat.ETC2_RGBA8: - case TextureFormat.ETC_RGB4_3DS: - case TextureFormat.ETC_RGBA8_3DS: - bitmap = PVRToBitmap(ConvertToPVR()); + case TextureFormat.RGB24: //test pass + bytes = DecodeRGB24(); + break; + case TextureFormat.RGBA32: //test pass + bytes = DecodeRGBA32(); + break; + case TextureFormat.ARGB32: //test pass + bytes = DecodeARGB32(); + break; + case TextureFormat.RGB565: //test pass + SwapBytesForXbox(); + bytes = DecodeRGB565(); + break; + case TextureFormat.R16: //test pass + bytes = DecodeR16(); + break; + case TextureFormat.DXT1: //test pass + SwapBytesForXbox(); + bytes = DecodeDXT1(); + break; + case TextureFormat.DXT5: //test pass + SwapBytesForXbox(); + bytes = DecodeDXT5(); + break; + case TextureFormat.RGBA4444: //test pass + bytes = DecodeRGBA4444(); + break; + case TextureFormat.BGRA32: //test pass + bytes = DecodeBGRA32(); break; - case TextureFormat.DXT1: - case TextureFormat.DXT5: case TextureFormat.RHalf: + bytes = DecodeRHalf(); + break; case TextureFormat.RGHalf: - case TextureFormat.RGBAHalf: + bytes = DecodeRGHalf(); + break; + case TextureFormat.RGBAHalf: //test pass + bytes = DecodeRGBAHalf(); + break; case TextureFormat.RFloat: + bytes = DecodeRFloat(); + break; case TextureFormat.RGFloat: + bytes = DecodeRGFloat(); + break; case TextureFormat.RGBAFloat: - case TextureFormat.RGB9e5Float: - case TextureFormat.ATC_RGB4: - case TextureFormat.ATC_RGBA8: - case TextureFormat.EAC_R: + bytes = DecodeRGBAFloat(); + break; + case TextureFormat.YUY2: //test pass + bytes = DecodeYUY2(); + break; + case TextureFormat.RGB9e5Float: //test pass + bytes = DecodeRGB9e5Float(); + break; + case TextureFormat.BC4: //test pass + bytes = DecodeBC4(); + break; + case TextureFormat.BC5: //test pass + bytes = DecodeBC5(); + break; + case TextureFormat.BC6H: //test pass + bytes = DecodeBC6H(); + break; + case TextureFormat.BC7: //test pass + bytes = DecodeBC7(); + break; + case TextureFormat.DXT1Crunched: //test pass + if (UnpackCrunch()) + { + bytes = DecodeDXT1(); + } + break; + case TextureFormat.DXT5Crunched: //test pass + if (UnpackCrunch()) + { + bytes = DecodeDXT5(); + } + break; + case TextureFormat.PVRTC_RGB2: //test pass + case TextureFormat.PVRTC_RGBA2: //test pass + bytes = DecodePVRTC(true); + break; + case TextureFormat.PVRTC_RGB4: //test pass + case TextureFormat.PVRTC_RGBA4: //test pass + bytes = DecodePVRTC(false); + break; + case TextureFormat.ETC_RGB4: //test pass + case TextureFormat.ETC_RGB4_3DS: + bytes = DecodeETC1(); + break; + case TextureFormat.ATC_RGB4: //test pass + bytes = DecodeATCRGB4(); + break; + case TextureFormat.ATC_RGBA8: //test pass + bytes = DecodeATCRGBA8(); + break; + case TextureFormat.EAC_R: //test pass + bytes = DecodeEACR(); + break; case TextureFormat.EAC_R_SIGNED: - case TextureFormat.EAC_RG: + bytes = DecodeEACRSigned(); + break; + case TextureFormat.EAC_RG: //test pass + bytes = DecodeEACRG(); + break; case TextureFormat.EAC_RG_SIGNED: - bitmap = TextureConverter(); + bytes = DecodeEACRGSigned(); break; - case TextureFormat.BC4: - case TextureFormat.BC5: - case TextureFormat.BC6H: - case TextureFormat.BC7: - bitmap = TexgenPackDecode(); + case TextureFormat.ETC2_RGB: //test pass + bytes = DecodeETC2(); break; - case TextureFormat.DXT1Crunched: - case TextureFormat.DXT5Crunched: - DecompressCRN(); - bitmap = TextureConverter(); + case TextureFormat.ETC2_RGBA1: //test pass + bytes = DecodeETC2A1(); break; - case TextureFormat.ETC_RGB4Crunched: - case TextureFormat.ETC2_RGBA8Crunched: - DecompressCRN(); - bitmap = PVRToBitmap(ConvertToPVR()); + case TextureFormat.ETC2_RGBA8: //test pass + case TextureFormat.ETC_RGBA8_3DS: + bytes = DecodeETC2A8(); break; - case TextureFormat.ASTC_RGB_4x4: - case TextureFormat.ASTC_RGB_5x5: - case TextureFormat.ASTC_RGB_6x6: - case TextureFormat.ASTC_RGB_8x8: - case TextureFormat.ASTC_RGB_10x10: - case TextureFormat.ASTC_RGB_12x12: - case TextureFormat.ASTC_RGBA_4x4: - case TextureFormat.ASTC_RGBA_5x5: - case TextureFormat.ASTC_RGBA_6x6: - case TextureFormat.ASTC_RGBA_8x8: - case TextureFormat.ASTC_RGBA_10x10: - case TextureFormat.ASTC_RGBA_12x12: - bitmap = DecodeASTC(); + case TextureFormat.ASTC_RGB_4x4: //test pass + case TextureFormat.ASTC_RGBA_4x4: //test pass + case TextureFormat.ASTC_HDR_4x4: //test pass + bytes = DecodeASTC(4); + break; + case TextureFormat.ASTC_RGB_5x5: //test pass + case TextureFormat.ASTC_RGBA_5x5: //test pass + case TextureFormat.ASTC_HDR_5x5: //test pass + bytes = DecodeASTC(5); + break; + case TextureFormat.ASTC_RGB_6x6: //test pass + case TextureFormat.ASTC_RGBA_6x6: //test pass + case TextureFormat.ASTC_HDR_6x6: //test pass + bytes = DecodeASTC(6); + break; + case TextureFormat.ASTC_RGB_8x8: //test pass + case TextureFormat.ASTC_RGBA_8x8: //test pass + case TextureFormat.ASTC_HDR_8x8: //test pass + bytes = DecodeASTC(8); + break; + case TextureFormat.ASTC_RGB_10x10: //test pass + case TextureFormat.ASTC_RGBA_10x10: //test pass + case TextureFormat.ASTC_HDR_10x10: //test pass + bytes = DecodeASTC(10); + break; + case TextureFormat.ASTC_RGB_12x12: //test pass + case TextureFormat.ASTC_RGBA_12x12: //test pass + case TextureFormat.ASTC_HDR_12x12: //test pass + bytes = DecodeASTC(12); + break; + case TextureFormat.RG16: //test pass + bytes = DecodeRG16(); + break; + case TextureFormat.R8: //test pass + bytes = DecodeR8(); + break; + case TextureFormat.ETC_RGB4Crunched: //test pass + if (UnpackCrunch()) + { + bytes = DecodeETC1(); + } + break; + case TextureFormat.ETC2_RGBA8Crunched: //test pass + if (UnpackCrunch()) + { + bytes = DecodeETC2A8(); + } break; - default: - return null; } - if (bitmap != null && flip) - bitmap.RotateFlip(RotateFlipType.RotateNoneFlipY); - return bitmap; + return bytes; } - private Bitmap BGRA32ToBitmap() + private void SwapBytesForXbox() { - var hObject = GCHandle.Alloc(image_data, GCHandleType.Pinned); - var pObject = hObject.AddrOfPinnedObject(); - var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, pObject); - hObject.Free(); - return bitmap; - } - - private Bitmap RGB565ToBitmap() - { - //stride = m_Width * 2 + m_Width * 2 % 4 - //所以m_Width * 2不为4的倍数时,需要在每行补上相应的像素 - byte[] buff; - var padding = m_Width * 2 % 4; - var stride = m_Width * 2 + padding; - if (padding != 0) + if (platform == BuildTarget.XBOX360) { - buff = new byte[stride * m_Height]; - for (int i = 0; i < m_Height; i++) + for (var i = 0; i < image_data_size / 2; i++) { - Buffer.BlockCopy(image_data, i * m_Width * 2, buff, i * stride, m_Width * 2); + var b = image_data[i * 2]; + image_data[i * 2] = image_data[i * 2 + 1]; + image_data[i * 2 + 1] = b; } } - else - { - buff = image_data; - } - var gch = GCHandle.Alloc(buff, GCHandleType.Pinned); - var imagePtr = gch.AddrOfPinnedObject(); - var bitmap = new Bitmap(m_Width, m_Height, stride, PixelFormat.Format16bppRgb565, imagePtr); - bitmap = bitmap.Clone(new Rectangle(0, 0, bitmap.Width, bitmap.Height), PixelFormat.Format32bppArgb); - gch.Free(); - return bitmap; } - private Bitmap PVRToBitmap(byte[] pvrData) + private byte[] DecodeAlpha8() { - var imageBuff = new byte[m_Width * m_Height * 4]; - var gch = GCHandle.Alloc(imageBuff, GCHandleType.Pinned); - var imagePtr = gch.AddrOfPinnedObject(); - if (!NativeMethods.DecompressPVR(pvrData, imagePtr)) + var buff = Enumerable.Repeat(0xFF, m_Width * m_Height * 4).ToArray(); + for (var i = 0; i < m_Width * m_Height; i++) + { + buff[i * 4 + 3] = image_data[i]; + } + return buff; + } + + private byte[] DecodeARGB4444() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + var pixelNew = new byte[4]; + var pixelOldShort = BitConverter.ToUInt16(image_data, i * 2); + pixelNew[0] = (byte)(pixelOldShort & 0x000f); + pixelNew[1] = (byte)((pixelOldShort & 0x00f0) >> 4); + pixelNew[2] = (byte)((pixelOldShort & 0x0f00) >> 8); + pixelNew[3] = (byte)((pixelOldShort & 0xf000) >> 12); + for (var j = 0; j < 4; j++) + pixelNew[j] = (byte)((pixelNew[j] << 4) | pixelNew[j]); + pixelNew.CopyTo(buff, i * 4); + } + return buff; + } + + private byte[] DecodeRGB24() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + buff[i * 4] = image_data[i * 3 + 2]; + buff[i * 4 + 1] = image_data[i * 3 + 1]; + buff[i * 4 + 2] = image_data[i * 3 + 0]; + buff[i * 4 + 3] = 255; + } + return buff; + } + + private byte[] DecodeRGBA32() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = image_data[i + 2]; + buff[i + 1] = image_data[i + 1]; + buff[i + 2] = image_data[i + 0]; + buff[i + 3] = image_data[i + 3]; + } + return buff; + } + + private byte[] DecodeARGB32() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = image_data[i + 3]; + buff[i + 1] = image_data[i + 2]; + buff[i + 2] = image_data[i + 1]; + buff[i + 3] = image_data[i + 0]; + } + return buff; + } + + private byte[] DecodeRGB565() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + var p = BitConverter.ToUInt16(image_data, i * 2); + buff[i * 4] = (byte)((p << 3) | (p >> 2 & 7)); + buff[i * 4 + 1] = (byte)((p >> 3 & 0xfc) | (p >> 9 & 3)); + buff[i * 4 + 2] = (byte)((p >> 8 & 0xf8) | (p >> 13)); + buff[i * 4 + 3] = 255; + } + return buff; + } + + private byte[] DecodeR16() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + buff[i * 4 + 2] = image_data[i * 2 + 1]; //r + buff[i * 4 + 3] = 255; //a + } + return buff; + } + + private byte[] DecodeDXT1() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeDXT1(image_data, m_Width, m_Height, buff)) { - gch.Free(); return null; } - var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, imagePtr); - gch.Free(); - return bitmap; + return buff; } - private Bitmap TextureConverter() + private byte[] DecodeDXT5() { - var imageBuff = new byte[m_Width * m_Height * 4]; - var gch = GCHandle.Alloc(imageBuff, GCHandleType.Pinned); - var imagePtr = gch.AddrOfPinnedObject(); - var fixAlpha = glBaseInternalFormat == KTXHeader.GL_RED || glBaseInternalFormat == KTXHeader.GL_RG; - if (!NativeMethods.Ponvert(image_data, image_data_size, m_Width, m_Height, (int)q_format, fixAlpha, imagePtr)) + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeDXT5(image_data, m_Width, m_Height, buff)) { - gch.Free(); return null; } - var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, imagePtr); - gch.Free(); - return bitmap; + return buff; } - private void DecompressCRN() + private byte[] DecodeRGBA4444() { - IntPtr uncompressedData; - int uncompressedSize; - bool result; + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + var pixelNew = new byte[4]; + var pixelOldShort = BitConverter.ToUInt16(image_data, i * 2); + pixelNew[0] = (byte)((pixelOldShort & 0x00f0) >> 4); + pixelNew[1] = (byte)((pixelOldShort & 0x0f00) >> 8); + pixelNew[2] = (byte)((pixelOldShort & 0xf000) >> 12); + pixelNew[3] = (byte)(pixelOldShort & 0x000f); + for (var j = 0; j < 4; j++) + pixelNew[j] = (byte)((pixelNew[j] << 4) | pixelNew[j]); + pixelNew.CopyTo(buff, i * 4); + } + return buff; + } + + private byte[] DecodeBGRA32() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = image_data[i]; + buff[i + 1] = image_data[i + 1]; + buff[i + 2] = image_data[i + 2]; + buff[i + 3] = image_data[i + 3]; + } + return buff; + } + + private byte[] DecodeRHalf() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = 0; + buff[i + 1] = 0; + buff[i + 2] = (byte)Math.Ceiling(Half.ToHalf(image_data, i / 2) * 255f); + buff[i + 3] = 255; + } + return buff; + } + + private byte[] DecodeRGHalf() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = 0; + buff[i + 1] = (byte)Math.Ceiling(Half.ToHalf(image_data, i + 2) * 255f); + buff[i + 2] = (byte)Math.Ceiling(Half.ToHalf(image_data, i) * 255f); + buff[i + 3] = 255; + } + return buff; + } + + private byte[] DecodeRGBAHalf() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = (byte)Math.Ceiling(Half.ToHalf(image_data, i * 2 + 4) * 255f); + buff[i + 1] = (byte)Math.Ceiling(Half.ToHalf(image_data, i * 2 + 2) * 255f); + buff[i + 2] = (byte)Math.Ceiling(Half.ToHalf(image_data, i * 2) * 255f); + buff[i + 3] = (byte)Math.Ceiling(Half.ToHalf(image_data, i * 2 + 6) * 255f); + } + return buff; + } + + private byte[] DecodeRFloat() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = 0; + buff[i + 1] = 0; + buff[i + 2] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i) * 255f); + buff[i + 3] = 255; + } + return buff; + } + + private byte[] DecodeRGFloat() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = 0; + buff[i + 1] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 2 + 4) * 255f); + buff[i + 2] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 2) * 255f); + buff[i + 3] = 255; + } + return buff; + } + + private byte[] DecodeRGBAFloat() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + buff[i] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 4 + 8) * 255f); + buff[i + 1] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 4 + 4) * 255f); + buff[i + 2] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 4) * 255f); + buff[i + 3] = (byte)Math.Ceiling(BitConverter.ToSingle(image_data, i * 4 + 12) * 255f); + } + return buff; + } + + private static byte ClampByte(int x) + { + return (byte)(byte.MaxValue < x ? byte.MaxValue : (x > byte.MinValue ? x : byte.MinValue)); + } + + private byte[] DecodeYUY2() + { + var buff = new byte[m_Width * m_Height * 4]; + int p = 0; + int o = 0; + int halfWidth = m_Width / 2; + for (int j = 0; j < m_Height; j++) + { + for (int i = 0; i < halfWidth; ++i) + { + int y0 = image_data[p++]; + int u0 = image_data[p++]; + int y1 = image_data[p++]; + int v0 = image_data[p++]; + int c = y0 - 16; + int d = u0 - 128; + int e = v0 - 128; + buff[o++] = ClampByte((298 * c + 516 * d + 128) >> 8); // b + buff[o++] = ClampByte((298 * c - 100 * d - 208 * e + 128) >> 8); // g + buff[o++] = ClampByte((298 * c + 409 * e + 128) >> 8); // r + buff[o++] = 255; + c = y1 - 16; + buff[o++] = ClampByte((298 * c + 516 * d + 128) >> 8); // b + buff[o++] = ClampByte((298 * c - 100 * d - 208 * e + 128) >> 8); // g + buff[o++] = ClampByte((298 * c + 409 * e + 128) >> 8); // r + buff[o++] = 255; + } + } + return buff; + } + + private byte[] DecodeRGB9e5Float() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < buff.Length; i += 4) + { + var n = BitConverter.ToInt32(image_data, i); + var scale = n >> 27 & 0x1f; + var scalef = Math.Pow(2, scale - 24); + var b = n >> 18 & 0x1ff; + var g = n >> 9 & 0x1ff; + var r = n & 0x1ff; + buff[i] = (byte)Math.Ceiling(b * scalef * 255f); + buff[i + 1] = (byte)Math.Ceiling(g * scalef * 255f); + buff[i + 2] = (byte)Math.Ceiling(r * scalef * 255f); + buff[i + 3] = 255; + } + return buff; + } + + private byte[] DecodeBC4() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeBC4(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeBC5() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeBC5(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeBC6H() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeBC6(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeBC7() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeBC7(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodePVRTC(bool is2bpp) + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodePVRTC(image_data, m_Width, m_Height, buff, is2bpp)) + { + return null; + } + return buff; + } + + private byte[] DecodeETC1() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeETC1(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeATCRGB4() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeATCRGB4(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeATCRGBA8() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeATCRGBA8(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeEACR() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeEACR(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeEACRSigned() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeEACRSigned(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeEACRG() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeEACRG(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeEACRGSigned() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeEACRGSigned(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeETC2() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeETC2(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeETC2A1() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeETC2A1(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeETC2A8() + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeETC2A8(image_data, m_Width, m_Height, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeASTC(int blocksize) + { + var buff = new byte[m_Width * m_Height * 4]; + if (!TextureDecoder.DecodeASTC(image_data, m_Width, m_Height, blocksize, blocksize, buff)) + { + return null; + } + return buff; + } + + private byte[] DecodeRG16() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i += 2) + { + buff[i * 2 + 1] = image_data[i + 1];//G + buff[i * 2 + 2] = image_data[i];//R + buff[i * 2 + 3] = 255;//A + } + return buff; + } + + private byte[] DecodeR8() + { + var buff = new byte[m_Width * m_Height * 4]; + for (var i = 0; i < m_Width * m_Height; i++) + { + buff[i * 4 + 2] = image_data[i];//R + buff[i * 4 + 3] = 255;//A + } + return buff; + } + + private bool UnpackCrunch() + { + byte[] result; if (version[0] > 2017 || (version[0] == 2017 && version[1] >= 3) //2017.3 and up || m_TextureFormat == TextureFormat.ETC_RGB4Crunched || m_TextureFormat == TextureFormat.ETC2_RGBA8Crunched) { - result = NativeMethods.DecompressUnityCRN(image_data, image_data_size, out uncompressedData, out uncompressedSize); + result = TextureDecoder.UnpackUnityCrunch(image_data); } else { - result = NativeMethods.DecompressCRN(image_data, image_data_size, out uncompressedData, out uncompressedSize); + result = TextureDecoder.UnpackCrunch(image_data); } - - if (result) + if (result != null) { - var uncompressedBytes = new byte[uncompressedSize]; - Marshal.Copy(uncompressedData, uncompressedBytes, 0, uncompressedSize); - Marshal.FreeHGlobal(uncompressedData); - image_data = uncompressedBytes; - image_data_size = uncompressedSize; + image_data = result; + image_data_size = result.Length; + return true; } + return false; } - - private Bitmap TexgenPackDecode() - { - var imageBuff = new byte[m_Width * m_Height * 4]; - var gch = GCHandle.Alloc(imageBuff, GCHandleType.Pinned); - var imagePtr = gch.AddrOfPinnedObject(); - NativeMethods.TexgenPackDecode(image_data, (int)texturetype, m_Width, m_Height, imagePtr); - var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, imagePtr); - gch.Free(); - return bitmap; - } - - private Bitmap DecodeASTC() - { - var imageBuff = new byte[m_Width * m_Height * 4]; - var gch = GCHandle.Alloc(imageBuff, GCHandleType.Pinned); - var imagePtr = gch.AddrOfPinnedObject(); - if (!NativeMethods.DecodeASTC(image_data, m_Width, m_Height, astcBlockWidth, astcBlockHeight, imagePtr)) - { - gch.Free(); - return null; - } - var bitmap = new Bitmap(m_Width, m_Height, m_Width * 4, PixelFormat.Format32bppArgb, imagePtr); - gch.Free(); - return bitmap; - } - } - - internal static class NativeMethods - { - [DllImport("PVRTexLibWrapper.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern bool DecompressPVR(byte[] data, IntPtr image); - - [DllImport("TextureConverterWrapper.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern bool Ponvert(byte[] data, int dataSize, int width, int height, int type, bool fixAlpha, IntPtr image); - - [DllImport("crunch.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern bool DecompressCRN(byte[] data, int dataSize, out IntPtr uncompressedData, out int uncompressedSize); - - [DllImport("crunchunity.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern bool DecompressUnityCRN(byte[] data, int dataSize, out IntPtr uncompressedData, out int uncompressedSize); - - [DllImport("texgenpack.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern void TexgenPackDecode(byte[] data, int textureType, int width, int height, IntPtr image); - - [DllImport("astc.dll", CallingConvention = CallingConvention.Cdecl)] - public static extern bool DecodeASTC(byte[] data, int width, int height, int blockwidth, int blockheight, IntPtr image); - } - - public static class KTXHeader - { - public static byte[] IDENTIFIER = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; - public static byte[] ENDIANESS_LE = { 1, 2, 3, 4 }; - - // constants for glInternalFormat - public static int GL_ETC1_RGB8_OES = 0x8D64; - - public static int GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00; - public static int GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG = 0x8C01; - public static int GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02; - public static int GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG = 0x8C03; - - public static int GL_ATC_RGB_AMD = 0x8C92; - public static int GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE; - - public static int GL_COMPRESSED_RGB8_ETC2 = 0x9274; - public static int GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 = 0x9276; - public static int GL_COMPRESSED_RGBA8_ETC2_EAC = 0x9278; - public static int GL_COMPRESSED_R11_EAC = 0x9270; - public static int GL_COMPRESSED_SIGNED_R11_EAC = 0x9271; - public static int GL_COMPRESSED_RG11_EAC = 0x9272; - public static int GL_COMPRESSED_SIGNED_RG11_EAC = 0x9273; - - public static int GL_COMPRESSED_RED_RGTC1 = 0x8DBB; - public static int GL_COMPRESSED_RG_RGTC2 = 0x8DBD; - public static int GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F; - public static int GL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C; - - public static int GL_R16F = 0x822D; - public static int GL_RG16F = 0x822F; - public static int GL_RGBA16F = 0x881A; - public static int GL_R32F = 0x822E; - public static int GL_RG32F = 0x8230; - public static int GL_RGBA32F = 0x8814; - - // constants for glBaseInternalFormat - public static int GL_RED = 0x1903; - public static int GL_RGB = 0x1907; - public static int GL_RGBA = 0x1908; - public static int GL_RG = 0x8227; - } - - //from TextureConverter.h - public enum QFORMAT - { - // General formats - Q_FORMAT_RGBA_8UI = 1, - Q_FORMAT_RGBA_8I, - Q_FORMAT_RGB5_A1UI, - Q_FORMAT_RGBA_4444, - Q_FORMAT_RGBA_16UI, - Q_FORMAT_RGBA_16I, - Q_FORMAT_RGBA_32UI, - Q_FORMAT_RGBA_32I, - - Q_FORMAT_PALETTE_8_RGBA_8888, - Q_FORMAT_PALETTE_8_RGBA_5551, - Q_FORMAT_PALETTE_8_RGBA_4444, - Q_FORMAT_PALETTE_4_RGBA_8888, - Q_FORMAT_PALETTE_4_RGBA_5551, - Q_FORMAT_PALETTE_4_RGBA_4444, - Q_FORMAT_PALETTE_1_RGBA_8888, - Q_FORMAT_PALETTE_8_RGB_888, - Q_FORMAT_PALETTE_8_RGB_565, - Q_FORMAT_PALETTE_4_RGB_888, - Q_FORMAT_PALETTE_4_RGB_565, - - Q_FORMAT_R2_GBA10UI, - Q_FORMAT_RGB10_A2UI, - Q_FORMAT_RGB10_A2I, - Q_FORMAT_RGBA_F, - Q_FORMAT_RGBA_HF, - - Q_FORMAT_RGB9_E5, // Last five bits are exponent bits (Read following section in GLES3 spec: "3.8.17 Shared Exponent Texture Color Conversion") - Q_FORMAT_RGB_8UI, - Q_FORMAT_RGB_8I, - Q_FORMAT_RGB_565, - Q_FORMAT_RGB_16UI, - Q_FORMAT_RGB_16I, - Q_FORMAT_RGB_32UI, - Q_FORMAT_RGB_32I, - - Q_FORMAT_RGB_F, - Q_FORMAT_RGB_HF, - Q_FORMAT_RGB_11_11_10_F, - - Q_FORMAT_RG_F, - Q_FORMAT_RG_HF, - Q_FORMAT_RG_32UI, - Q_FORMAT_RG_32I, - Q_FORMAT_RG_16I, - Q_FORMAT_RG_16UI, - Q_FORMAT_RG_8I, - Q_FORMAT_RG_8UI, - Q_FORMAT_RG_S88, - - Q_FORMAT_R_32UI, - Q_FORMAT_R_32I, - Q_FORMAT_R_F, - Q_FORMAT_R_16F, - Q_FORMAT_R_16I, - Q_FORMAT_R_16UI, - Q_FORMAT_R_8I, - Q_FORMAT_R_8UI, - - Q_FORMAT_LUMINANCE_ALPHA_88, - Q_FORMAT_LUMINANCE_8, - Q_FORMAT_ALPHA_8, - - Q_FORMAT_LUMINANCE_ALPHA_F, - Q_FORMAT_LUMINANCE_F, - Q_FORMAT_ALPHA_F, - Q_FORMAT_LUMINANCE_ALPHA_HF, - Q_FORMAT_LUMINANCE_HF, - Q_FORMAT_ALPHA_HF, - Q_FORMAT_DEPTH_16, - Q_FORMAT_DEPTH_24, - Q_FORMAT_DEPTH_24_STENCIL_8, - Q_FORMAT_DEPTH_32, - - Q_FORMAT_BGR_565, - Q_FORMAT_BGRA_8888, - Q_FORMAT_BGRA_5551, - Q_FORMAT_BGRX_8888, - Q_FORMAT_BGRA_4444, - // Compressed formats - Q_FORMAT_ATITC_RGBA, - Q_FORMAT_ATC_RGBA_EXPLICIT_ALPHA = Q_FORMAT_ATITC_RGBA, - Q_FORMAT_ATITC_RGB, - Q_FORMAT_ATC_RGB = Q_FORMAT_ATITC_RGB, - Q_FORMAT_ATC_RGBA_INTERPOLATED_ALPHA, - Q_FORMAT_ETC1_RGB8, - Q_FORMAT_3DC_X, - Q_FORMAT_3DC_XY, - - Q_FORMAT_ETC2_RGB8, - Q_FORMAT_ETC2_RGBA8, - Q_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1, - Q_FORMAT_ETC2_SRGB8, - Q_FORMAT_ETC2_SRGB8_ALPHA8, - Q_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1, - Q_FORMAT_EAC_R_SIGNED, - Q_FORMAT_EAC_R_UNSIGNED, - Q_FORMAT_EAC_RG_SIGNED, - Q_FORMAT_EAC_RG_UNSIGNED, - - Q_FORMAT_S3TC_DXT1_RGB, - Q_FORMAT_S3TC_DXT1_RGBA, - Q_FORMAT_S3TC_DXT3_RGBA, - Q_FORMAT_S3TC_DXT5_RGBA, - - // YUV formats - Q_FORMAT_AYUV_32, - Q_FORMAT_I444_24, - Q_FORMAT_YUYV_16, - Q_FORMAT_UYVY_16, - Q_FORMAT_I420_12, - Q_FORMAT_YV12_12, - Q_FORMAT_NV21_12, - Q_FORMAT_NV12_12, - - // ASTC Format - Q_FORMAT_ASTC_8, - Q_FORMAT_ASTC_16, - }; - - public enum texgenpack_texturetype - { - RGTC1, - RGTC2, - BPTC_FLOAT, - BPTC } } diff --git a/AssetStudioUtility/Texture2DExtensions.cs b/AssetStudioUtility/Texture2DExtensions.cs new file mode 100644 index 0000000..132ca91 --- /dev/null +++ b/AssetStudioUtility/Texture2DExtensions.cs @@ -0,0 +1,13 @@ +using System.Drawing; + +namespace AssetStudio +{ + public static class Texture2DExtensions + { + public static Bitmap ConvertToBitmap(this Texture2D m_Texture2D, bool flip) + { + var converter = new Texture2DConverter(m_Texture2D); + return converter.ConvertToBitmap(flip); + } + } +} diff --git a/Texture2DDecoder/AssemblyInfo.cpp b/Texture2DDecoder/AssemblyInfo.cpp new file mode 100644 index 0000000..8a147b6 --- /dev/null +++ b/Texture2DDecoder/AssemblyInfo.cpp @@ -0,0 +1,20 @@ +using namespace System; +using namespace System::Reflection; +using namespace System::Runtime::CompilerServices; +using namespace System::Runtime::InteropServices; +using namespace System::Security::Permissions; + +[assembly:AssemblyTitleAttribute(L"Texture2DDecoder")]; +[assembly:AssemblyDescriptionAttribute(L"")]; +[assembly:AssemblyConfigurationAttribute(L"")]; +[assembly:AssemblyCompanyAttribute(L"")]; +[assembly:AssemblyProductAttribute(L"Texture2DDecoder")]; +[assembly:AssemblyCopyrightAttribute(L"Copyright © Perfare 2020")]; +[assembly:AssemblyTrademarkAttribute(L"")]; +[assembly:AssemblyCultureAttribute(L"")]; + +[assembly:AssemblyVersionAttribute("1.0.*")]; + +[assembly:ComVisible(false)]; + +[assembly:CLSCompliantAttribute(true)]; diff --git a/Texture2DDecoder/Texture2DDecoder.cpp b/Texture2DDecoder/Texture2DDecoder.cpp new file mode 100644 index 0000000..02a41aa --- /dev/null +++ b/Texture2DDecoder/Texture2DDecoder.cpp @@ -0,0 +1,148 @@ +#include +#include "Texture2DDecoder.h" +#include "bcn.h" +#include "pvrtc.h" +#include "etc.h" +#include "atc.h" +#include "astc.h" +#include "crunch.h" +#include "unitycrunch.h" + +namespace Texture2DDecoder { + bool TextureDecoder::DecodeDXT1(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc1(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeDXT5(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc3(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodePVRTC(array^ data, long w, long h, array^ image, bool is2bpp) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_pvrtc(dataPin, w, h, reinterpret_cast(imagePin), is2bpp ? 1 : 0); + } + + bool TextureDecoder::DecodeETC1(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_etc1(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeETC2(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_etc2(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeETC2A1(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_etc2a1(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeETC2A8(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_etc2a8(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeEACR(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_eacr(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeEACRSigned(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_eacr_signed(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeEACRG(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_eacrg(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeEACRGSigned(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_eacrg_signed(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeBC4(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc4(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeBC5(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc5(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeBC6(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc6(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeBC7(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_bc7(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeATCRGB4(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_atc_rgb4(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeATCRGBA8(array^ data, long w, long h, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_atc_rgba8(dataPin, w, h, reinterpret_cast(imagePin)); + } + + bool TextureDecoder::DecodeASTC(array^ data, long w, long h, int bw, int bh, array^ image) { + pin_ptr dataPin = &data[0]; + pin_ptr imagePin = &image[0]; + return decode_astc(dataPin, w, h, bw, bh, reinterpret_cast(imagePin)); + } + + array^ TextureDecoder::UnpackCrunch(array^ data) { + pin_ptr dataPin = &data[0]; + void* ret; + uint32_t retSize; + if (!crunch_unpack_level(dataPin, data->Length, 0, &ret, &retSize)) { + return nullptr; + } + auto buff = gcnew array(retSize); + pin_ptr buffPin = &buff[0]; + memcpy(buffPin, ret, retSize); + delete ret; + return buff; + } + + array^ TextureDecoder::UnpackUnityCrunch(array^ data) { + pin_ptr dataPin = &data[0]; + void* ret; + uint32_t retSize; + if (!unity_crunch_unpack_level(dataPin, data->Length, 0, &ret, &retSize)) { + return nullptr; + } + auto buff = gcnew array(retSize); + pin_ptr buffPin = &buff[0]; + memcpy(buffPin, ret, retSize); + delete ret; + return buff; + } +} + diff --git a/Texture2DDecoder/Texture2DDecoder.h b/Texture2DDecoder/Texture2DDecoder.h new file mode 100644 index 0000000..030d2e6 --- /dev/null +++ b/Texture2DDecoder/Texture2DDecoder.h @@ -0,0 +1,30 @@ +#pragma once + +using namespace System; + +namespace Texture2DDecoder { + public ref class TextureDecoder + { + public: + static bool DecodeDXT1(array^ data, long w, long h, array^ image); + static bool DecodeDXT5(array^ data, long w, long h, array^ image); + static bool DecodePVRTC(array^ data, long w, long h, array^ image, bool is2bpp); + static bool DecodeETC1(array^ data, long w, long h, array^ image); + static bool DecodeETC2(array^ data, long w, long h, array^ image); + static bool DecodeETC2A1(array^ data, long w, long h, array^ image); + static bool DecodeETC2A8(array^ data, long w, long h, array^ image); + static bool DecodeEACR(array^ data, long w, long h, array^ image); + static bool DecodeEACRSigned(array^ data, long w, long h, array^ image); + static bool DecodeEACRG(array^ data, long w, long h, array^ image); + static bool DecodeEACRGSigned(array^ data, long w, long h, array^ image); + static bool DecodeBC4(array^ data, long w, long h, array^ image); + static bool DecodeBC5(array^ data, long w, long h, array^ image); + static bool DecodeBC6(array^ data, long w, long h, array^ image); + static bool DecodeBC7(array^ data, long w, long h, array^ image); + static bool DecodeATCRGB4(array^ data, long w, long h, array^ image); + static bool DecodeATCRGBA8(array^ data, long w, long h, array^ image); + static bool DecodeASTC(array^ data, long w, long h, int bw, int bh, array^ image); + static array^ UnpackCrunch(array^ data); + static array^ UnpackUnityCrunch(array^ data); + }; +} \ No newline at end of file diff --git a/Texture2DDecoder/Texture2DDecoder.vcxproj b/Texture2DDecoder/Texture2DDecoder.vcxproj new file mode 100644 index 0000000..632ab7b --- /dev/null +++ b/Texture2DDecoder/Texture2DDecoder.vcxproj @@ -0,0 +1,147 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 16.0 + {57CFF625-57AB-424A-9B6B-B5ED01282E92} + v4.0 + ManagedCProj + Texture2DDecoder + 10.0 + + + + DynamicLibrary + true + v142 + true + Unicode + + + DynamicLibrary + false + v142 + true + Unicode + + + DynamicLibrary + true + v142 + true + Unicode + + + DynamicLibrary + false + v142 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + Level3 + _DEBUG;%(PreprocessorDefinitions) + + + + + + + + Level3 + WIN32;_DEBUG;%(PreprocessorDefinitions) + + + + + + + + Level3 + WIN32;NDEBUG;%(PreprocessorDefinitions) + + + + + + + + Level3 + NDEBUG;%(PreprocessorDefinitions) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Texture2DDecoder/Texture2DDecoder.vcxproj.filters b/Texture2DDecoder/Texture2DDecoder.vcxproj.filters new file mode 100644 index 0000000..6ce1374 --- /dev/null +++ b/Texture2DDecoder/Texture2DDecoder.vcxproj.filters @@ -0,0 +1,98 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + \ No newline at end of file diff --git a/Texture2DDecoder/astc.cpp b/Texture2DDecoder/astc.cpp new file mode 100644 index 0000000..bb4d98a --- /dev/null +++ b/Texture2DDecoder/astc.cpp @@ -0,0 +1,1148 @@ +#include "astc.h" +#include +#include +#include +#include +#include "color.h" +#include "fp16.h" + +static const int BitReverseTable[] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, + 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, + 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, + 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, + 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, + 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, + 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, + 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, + 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, + 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, + 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, + 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, + 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF}; + +static const int WeightPrecTableA[] = {0, 0, 0, 3, 0, 5, 3, 0, 0, 0, 5, 3, 0, 5, 3, 0}; +static const int WeightPrecTableB[] = {0, 0, 1, 0, 2, 0, 1, 3, 0, 0, 1, 2, 4, 2, 3, 5}; + +static const int CemTableA[] = {0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 5, 0, 3, 0, 0}; +static const int CemTableB[] = {8, 6, 5, 7, 5, 4, 6, 4, 3, 5, 3, 2, 4, 2, 1, 3, 1, 2, 1}; + +static inline uint_fast8_t bit_reverse_u8(const uint_fast8_t c, const int bits) { + return BitReverseTable[c] >> (8 - bits); +} + +static inline uint_fast64_t bit_reverse_u64(const uint_fast64_t d, const int bits) { + uint_fast64_t ret = (uint_fast64_t)BitReverseTable[d & 0xff] << 56 | + (uint_fast64_t)BitReverseTable[d >> 8 & 0xff] << 48 | (uint_fast64_t)BitReverseTable[d >> 16 & 0xff] << 40 | + (uint_fast64_t)BitReverseTable[d >> 24 & 0xff] << 32 | (uint_fast32_t)BitReverseTable[d >> 32 & 0xff] << 24 | + (uint_fast32_t)BitReverseTable[d >> 40 & 0xff] << 16 | (uint_fast16_t)BitReverseTable[d >> 48 & 0xff] << 8 | + BitReverseTable[d >> 56 & 0xff]; + return ret >> (64 - bits); +} + +static inline int getbits(const uint8_t *buf, const int bit, const int len) { + return (*(int *)(buf + bit / 8) >> (bit % 8)) & ((1 << len) - 1); +} + +static inline uint_fast64_t getbits64(const uint8_t *buf, const int bit, const int len) { + uint_fast64_t mask = len == 64 ? 0xffffffffffffffff : (1ull << len) - 1; + if (len < 1) + return 0; + else if (bit >= 64) + return (*(uint_fast64_t *)(buf + 8)) >> (bit - 64) & mask; + else if (bit <= 0) + return (*(uint_fast64_t *)buf) << -bit & mask; + else if (bit + len <= 64) + return (*(uint_fast64_t *)buf) >> bit & mask; + else + return ((*(uint_fast64_t *)buf) >> bit | *(uint_fast64_t *)(buf + 8) << (64 - bit)) & mask; +} + +static inline uint16_t u8ptr_to_u16(const uint8_t *ptr) { + return lton16(*(uint16_t *)ptr); +} + +static inline uint_fast8_t clamp(const int n) { + return n < 0 ? 0 : n > 255 ? 255 : n; +} + +static inline void bit_transfer_signed(int *a, int *b) { + *b = (*b >> 1) | (*a & 0x80); + *a = (*a >> 1) & 0x3f; + if (*a & 0x20) + *a -= 0x40; +} + +static inline void set_endpoint(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = r1; + endpoint[1] = g1; + endpoint[2] = b1; + endpoint[3] = a1; + endpoint[4] = r2; + endpoint[5] = g2; + endpoint[6] = b2; + endpoint[7] = a2; +} + +static inline void set_endpoint_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = clamp(r1); + endpoint[1] = clamp(g1); + endpoint[2] = clamp(b1); + endpoint[3] = clamp(a1); + endpoint[4] = clamp(r2); + endpoint[5] = clamp(g2); + endpoint[6] = clamp(b2); + endpoint[7] = clamp(a2); +} + +static inline void set_endpoint_blue(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = (r1 + b1) >> 1; + endpoint[1] = (g1 + b1) >> 1; + endpoint[2] = b1; + endpoint[3] = a1; + endpoint[4] = (r2 + b2) >> 1; + endpoint[5] = (g2 + b2) >> 1; + endpoint[6] = b2; + endpoint[7] = a2; +} + +static inline void set_endpoint_blue_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, + int a2) { + endpoint[0] = clamp((r1 + b1) >> 1); + endpoint[1] = clamp((g1 + b1) >> 1); + endpoint[2] = clamp(b1); + endpoint[3] = clamp(a1); + endpoint[4] = clamp((r2 + b2) >> 1); + endpoint[5] = clamp((g2 + b2) >> 1); + endpoint[6] = clamp(b2); + endpoint[7] = clamp(a2); +} + +static inline uint_fast16_t clamp_hdr(const int n) { + return n < 0 ? 0 : n > 0xfff ? 0xfff : n; +} + +static inline void set_endpoint_hdr(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, int a2) { + endpoint[0] = r1; + endpoint[1] = g1; + endpoint[2] = b1; + endpoint[3] = a1; + endpoint[4] = r2; + endpoint[5] = g2; + endpoint[6] = b2; + endpoint[7] = a2; +} + +static inline void set_endpoint_hdr_clamp(int endpoint[8], int r1, int g1, int b1, int a1, int r2, int g2, int b2, + int a2) { + endpoint[0] = clamp_hdr(r1); + endpoint[1] = clamp_hdr(g1); + endpoint[2] = clamp_hdr(b1); + endpoint[3] = clamp_hdr(a1); + endpoint[4] = clamp_hdr(r2); + endpoint[5] = clamp_hdr(g2); + endpoint[6] = clamp_hdr(b2); + endpoint[7] = clamp_hdr(a2); +} + +typedef uint_fast8_t (*t_select_folor_func_ptr)(int, int, int); + +static uint_fast8_t select_color(int v0, int v1, int weight) { + return ((((v0 << 8 | v0) * (64 - weight) + (v1 << 8 | v1) * weight + 32) >> 6) * 255 + 32768) / 65536; +} + +static uint_fast8_t select_color_hdr(int v0, int v1, int weight) { + uint16_t c = ((v0 << 4) * (64 - weight) + (v1 << 4) * weight + 32) >> 6; + uint16_t m = c & 0x7ff; + if (m < 512) + m *= 3; + else if (m < 1536) + m = 4 * m - 512; + else + m = 5 * m - 2048; + float f = fp16_ieee_to_fp32_value((c >> 1 & 0x7c00) | m >> 3); + return isfinite(f) ? clamp(roundf(f * 255)) : 255; +} + +static inline uint8_t f32_to_u8(const float f) { + float c = roundf(f * 255); + if (c < 0) + return 0; + else if (c > 255) + return 255; + else + return c; +} + +static inline uint8_t f16ptr_to_u8(const uint8_t *ptr) { + return f32_to_u8(fp16_ieee_to_fp32_value(lton16(*(uint16_t *)ptr))); +} + +typedef struct { + int bw; + int bh; + int width; + int height; + int part_num; + int dual_plane; + int plane_selector; + int weight_range; + int weight_num; + int cem[4]; + int cem_range; + int endpoint_value_num; + int endpoints[4][8]; + int weights[144][2]; + int partition[144]; +} BlockData; + +typedef struct { + int bits; + int nonbits; +} IntSeqData; + +void decode_intseq(const uint8_t *buf, int offset, const int a, const int b, const int count, const int reverse, + IntSeqData *out) { + static int mt[] = {0, 2, 4, 5, 7}; + static int mq[] = {0, 3, 5}; + static int TritsTable[5][256] = { + {0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, + 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, + 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, + 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, + 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, + 1, 2, 2, 0, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, + 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 0, 0, 1, 2, 1, 0, 1, 2, 2, 0, 1, 2, 2}, + {0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, + 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, + 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, + 2, 1, 0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1}, + {0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, + 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, + 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, + 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, + 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, + 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, + 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}}; + static int QuintsTable[3][128] = { + {0, 1, 2, 3, 4, 0, 4, 4, 0, 1, 2, 3, 4, 1, 4, 4, 0, 1, 2, 3, 4, 2, 4, 4, 0, 1, 2, 3, 4, 3, 4, 4, + 0, 1, 2, 3, 4, 0, 4, 0, 0, 1, 2, 3, 4, 1, 4, 1, 0, 1, 2, 3, 4, 2, 4, 2, 0, 1, 2, 3, 4, 3, 4, 3, + 0, 1, 2, 3, 4, 0, 2, 3, 0, 1, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 4, 2, 2, 3, 0, 1, 2, 3, 4, 3, 2, 3, + 0, 1, 2, 3, 4, 0, 0, 1, 0, 1, 2, 3, 4, 1, 0, 1, 0, 1, 2, 3, 4, 2, 0, 1, 0, 1, 2, 3, 4, 3, 0, 1}, + {0, 0, 0, 0, 0, 4, 4, 4, 1, 1, 1, 1, 1, 4, 4, 4, 2, 2, 2, 2, 2, 4, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 0, 4, 1, 1, 1, 1, 1, 4, 1, 4, 2, 2, 2, 2, 2, 4, 2, 4, 3, 3, 3, 3, 3, 4, 3, 4, + 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3, + 0, 0, 0, 0, 0, 4, 0, 0, 1, 1, 1, 1, 1, 4, 1, 1, 2, 2, 2, 2, 2, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3}, + {0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 3, 4, + 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, 1, 1, 1, 1, 1, 1, 4, 4, + 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 4, 4, + 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4}}; + + if (count <= 0) + return; + + int n = 0; + + if (a == 3) { + int mask = (1 << b) - 1; + int block_count = (count + 4) / 5; + int last_block_count = (count + 4) % 5 + 1; + int block_size = 8 + 5 * b; + int last_block_size = (block_size * last_block_count + 4) / 5; + + if (reverse) { + for (int i = 0, p = offset; i < block_count; i++, p -= block_size) { + int now_size = (i < block_count - 1) ? block_size : last_block_size; + uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); + int x = + (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); + for (int j = 0; j < 5 && n < count; j++, n++) + out[n] = { static_cast(d >> (mt[j] + b * j) & mask), TritsTable[j][x]}; + } + } else { + for (int i = 0, p = offset; i < block_count; i++, p += block_size) { + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); + int x = + (d >> b & 3) | (d >> b * 2 & 0xc) | (d >> b * 3 & 0x10) | (d >> b * 4 & 0x60) | (d >> b * 5 & 0x80); + for (int j = 0; j < 5 && n < count; j++, n++) + out[n] = { static_cast(d >> (mt[j] + b * j) & mask), TritsTable[j][x]}; + } + } + } else if (a == 5) { + int mask = (1 << b) - 1; + int block_count = (count + 2) / 3; + int last_block_count = (count + 2) % 3 + 1; + int block_size = 7 + 3 * b; + int last_block_size = (block_size * last_block_count + 2) / 3; + + if (reverse) { + for (int i = 0, p = offset; i < block_count; i++, p -= block_size) { + int now_size = (i < block_count - 1) ? block_size : last_block_size; + uint_fast64_t d = bit_reverse_u64(getbits64(buf, p - now_size, now_size), now_size); + int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); + for (int j = 0; j < 3 && n < count; j++, n++) + out[n] = { static_cast(d >> (mq[j] + b * j) & mask), QuintsTable[j][x]}; + } + } else { + for (int i = 0, p = offset; i < block_count; i++, p += block_size) { + uint_fast64_t d = getbits64(buf, p, (i < block_count - 1) ? block_size : last_block_size); + int x = (d >> b & 7) | (d >> b * 2 & 0x18) | (d >> b * 3 & 0x60); + for (int j = 0; j < 3 && n < count; j++, n++) + out[n] = { static_cast(d >> (mq[j] + b * j) & mask), QuintsTable[j][x]}; + } + } + } else { + if (reverse) + for (int p = offset - b; n < count; n++, p -= b) + out[n] = {bit_reverse_u8(getbits(buf, p, b), b), 0}; + else + for (int p = offset; n < count; n++, p += b) + out[n] = {getbits(buf, p, b), 0}; + } +} + +void decode_block_params(const uint8_t *buf, BlockData *block_data) { + block_data->dual_plane = !!(buf[1] & 4); + block_data->weight_range = (buf[0] >> 4 & 1) | (buf[1] << 2 & 8); + + if (buf[0] & 3) { + block_data->weight_range |= buf[0] << 1 & 6; + switch (buf[0] & 0xc) { + case 0: + block_data->width = (u8ptr_to_u16(buf) >> 7 & 3) + 4; + block_data->height = (buf[0] >> 5 & 3) + 2; + break; + case 4: + block_data->width = (u8ptr_to_u16(buf) >> 7 & 3) + 8; + block_data->height = (buf[0] >> 5 & 3) + 2; + break; + case 8: + block_data->width = (buf[0] >> 5 & 3) + 2; + block_data->height = (u8ptr_to_u16(buf) >> 7 & 3) + 8; + break; + case 12: + if (buf[1] & 1) { + block_data->width = (buf[0] >> 7 & 1) + 2; + block_data->height = (buf[0] >> 5 & 3) + 2; + } else { + block_data->width = (buf[0] >> 5 & 3) + 2; + block_data->height = (buf[0] >> 7 & 1) + 6; + } + break; + } + } else { + block_data->weight_range |= buf[0] >> 1 & 6; + switch (u8ptr_to_u16(buf) & 0x180) { + case 0: + block_data->width = 12; + block_data->height = (buf[0] >> 5 & 3) + 2; + break; + case 0x80: + block_data->width = (buf[0] >> 5 & 3) + 2; + block_data->height = 12; + break; + case 0x100: + block_data->width = (buf[0] >> 5 & 3) + 6; + block_data->height = (buf[1] >> 1 & 3) + 6; + block_data->dual_plane = 0; + block_data->weight_range &= 7; + break; + case 0x180: + block_data->width = (buf[0] & 0x20) ? 10 : 6; + block_data->height = (buf[0] & 0x20) ? 6 : 10; + break; + } + } + + block_data->part_num = (buf[1] >> 3 & 3) + 1; + + block_data->weight_num = block_data->width * block_data->height; + if (block_data->dual_plane) + block_data->weight_num *= 2; + + int weight_bits, config_bits, cem_base = 0; + + switch (WeightPrecTableA[block_data->weight_range]) { + case 3: + weight_bits = + block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 8 + 4) / 5; + break; + case 5: + weight_bits = + block_data->weight_num * WeightPrecTableB[block_data->weight_range] + (block_data->weight_num * 7 + 2) / 3; + break; + default: + weight_bits = block_data->weight_num * WeightPrecTableB[block_data->weight_range]; + } + + if (block_data->part_num == 1) { + block_data->cem[0] = u8ptr_to_u16(buf + 1) >> 5 & 0xf; + config_bits = 17; + } else { + cem_base = u8ptr_to_u16(buf + 2) >> 7 & 3; + if (cem_base == 0) { + int cem = buf[3] >> 1 & 0xf; + for (int i = 0; i < block_data->part_num; i++) + block_data->cem[i] = cem; + config_bits = 29; + } else { + for (int i = 0; i < block_data->part_num; i++) + block_data->cem[i] = ((buf[3] >> (i + 1) & 1) + cem_base - 1) << 2; + switch (block_data->part_num) { + case 2: + block_data->cem[0] |= buf[3] >> 3 & 3; + block_data->cem[1] |= getbits(buf, 126 - weight_bits, 2); + break; + case 3: + block_data->cem[0] |= buf[3] >> 4 & 1; + block_data->cem[0] |= getbits(buf, 122 - weight_bits, 2) & 2; + block_data->cem[1] |= getbits(buf, 124 - weight_bits, 2); + block_data->cem[2] |= getbits(buf, 126 - weight_bits, 2); + break; + case 4: + for (int i = 0; i < 4; i++) + block_data->cem[i] |= getbits(buf, 120 + i * 2 - weight_bits, 2); + break; + } + config_bits = 25 + block_data->part_num * 3; + } + } + + if (block_data->dual_plane) { + config_bits += 2; + block_data->plane_selector = + getbits(buf, cem_base ? 130 - weight_bits - block_data->part_num * 3 : 126 - weight_bits, 2); + } + + int remain_bits = 128 - config_bits - weight_bits; + + block_data->endpoint_value_num = 0; + for (int i = 0; i < block_data->part_num; i++) + block_data->endpoint_value_num += (block_data->cem[i] >> 1 & 6) + 2; + + for (int i = 0, endpoint_bits; i < (int)(sizeof(CemTableA) / sizeof(int)); i++) { + switch (CemTableA[i]) { + case 3: + endpoint_bits = + block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 8 + 4) / 5; + break; + case 5: + endpoint_bits = + block_data->endpoint_value_num * CemTableB[i] + (block_data->endpoint_value_num * 7 + 2) / 3; + break; + default: + endpoint_bits = block_data->endpoint_value_num * CemTableB[i]; + } + + if (endpoint_bits <= remain_bits) { + block_data->cem_range = i; + break; + } + } +} + +void decode_endpoints_hdr7(int *endpoints, int *v) { + int modeval = (v[2] >> 4 & 0x8) | (v[1] >> 5 & 0x4) | (v[0] >> 6); + int major_component, mode; + if ((modeval & 0xc) != 0xc) { + major_component = modeval >> 2; + mode = modeval & 3; + } else if (modeval != 0xf) { + major_component = modeval & 3; + mode = 4; + } else { + major_component = 0; + mode = 5; + } + int c[] = {v[0] & 0x3f, v[1] & 0x1f, v[2] & 0x1f, v[3] & 0x1f}; + + switch (mode) { + case 0: + c[3] |= v[3] & 0x60; + c[0] |= v[3] >> 1 & 0x40; + c[0] |= v[2] << 1 & 0x80; + c[0] |= v[1] << 3 & 0x300; + c[0] |= v[2] << 5 & 0x400; + c[0] <<= 1; + c[1] <<= 1; + c[2] <<= 1; + c[3] <<= 1; + break; + case 1: + c[1] |= v[1] & 0x20; + c[2] |= v[2] & 0x20; + c[0] |= v[3] >> 1 & 0x40; + c[0] |= v[2] << 1 & 0x80; + c[0] |= v[1] << 2 & 0x100; + c[0] |= v[3] << 4 & 0x600; + c[0] <<= 1; + c[1] <<= 1; + c[2] <<= 1; + c[3] <<= 1; + break; + case 2: + c[3] |= v[3] & 0xe0; + c[0] |= v[2] << 1 & 0xc0; + c[0] |= v[1] << 3 & 0x300; + c[0] <<= 2; + c[1] <<= 2; + c[2] <<= 2; + c[3] <<= 2; + break; + case 3: + c[1] |= v[1] & 0x20; + c[2] |= v[2] & 0x20; + c[3] |= v[3] & 0x60; + c[0] |= v[3] >> 1 & 0x40; + c[0] |= v[2] << 1 & 0x80; + c[0] |= v[1] << 2 & 0x100; + c[0] <<= 3; + c[1] <<= 3; + c[2] <<= 3; + c[3] <<= 3; + break; + case 4: + c[1] |= v[1] & 0x60; + c[2] |= v[2] & 0x60; + c[3] |= v[3] & 0x20; + c[0] |= v[3] >> 1 & 0x40; + c[0] |= v[3] << 1 & 0x80; + c[0] <<= 4; + c[1] <<= 4; + c[2] <<= 4; + c[3] <<= 4; + break; + case 5: + c[1] |= v[1] & 0x60; + c[2] |= v[2] & 0x60; + c[3] |= v[3] & 0x60; + c[0] |= v[3] >> 1 & 0x40; + c[0] <<= 5; + c[1] <<= 5; + c[2] <<= 5; + c[3] <<= 5; + break; + } + if (mode != 5) { + c[1] = c[0] - c[1]; + c[2] = c[0] - c[2]; + } + if (major_component == 1) + set_endpoint_hdr_clamp(endpoints, c[1] - c[3], c[0] - c[3], c[2] - c[3], 0x780, c[1], c[0], c[2], 0x780); + else if (major_component == 2) + set_endpoint_hdr_clamp(endpoints, c[2] - c[3], c[1] - c[3], c[0] - c[3], 0x780, c[2], c[1], c[0], 0x780); + else + set_endpoint_hdr_clamp(endpoints, c[0] - c[3], c[1] - c[3], c[2] - c[3], 0x780, c[0], c[1], c[2], 0x780); +} + +void decode_endpoints_hdr11(int *endpoints, int *v, int alpha1, int alpha2) { + int major_component = (v[4] >> 7) | (v[5] >> 6 & 2); + if (major_component == 3) { + set_endpoint_hdr(endpoints, v[0] << 4, v[2] << 4, v[4] << 5 & 0xfe0, alpha1, v[1] << 4, v[3] << 4, + v[5] << 5 & 0xfe0, alpha2); + return; + } + int mode = (v[1] >> 7) | (v[2] >> 6 & 2) | (v[3] >> 5 & 4); + int va = v[0] | (v[1] << 2 & 0x100); + int vb0 = v[2] & 0x3f, vb1 = v[3] & 0x3f; + int vc = v[1] & 0x3f; + int16_t vd0, vd1; + + switch (mode) { + case 0: + case 2: + vd0 = v[4] & 0x7f; + if (vd0 & 0x40) + vd0 |= 0xff80; + vd1 = v[5] & 0x7f; + if (vd1 & 0x40) + vd1 |= 0xff80; + break; + case 1: + case 3: + case 5: + case 7: + vd0 = v[4] & 0x3f; + if (vd0 & 0x20) + vd0 |= 0xffc0; + vd1 = v[5] & 0x3f; + if (vd1 & 0x20) + vd1 |= 0xffc0; + break; + default: + vd0 = v[4] & 0x1f; + if (vd0 & 0x10) + vd0 |= 0xffe0; + vd1 = v[5] & 0x1f; + if (vd1 & 0x10) + vd1 |= 0xffe0; + break; + } + + switch (mode) { + case 0: + vb0 |= v[2] & 0x40; + vb1 |= v[3] & 0x40; + break; + case 1: + vb0 |= v[2] & 0x40; + vb1 |= v[3] & 0x40; + vb0 |= v[4] << 1 & 0x80; + vb1 |= v[5] << 1 & 0x80; + break; + case 2: + va |= v[2] << 3 & 0x200; + vc |= v[3] & 0x40; + break; + case 3: + va |= v[4] << 3 & 0x200; + vc |= v[5] & 0x40; + vb0 |= v[2] & 0x40; + vb1 |= v[3] & 0x40; + break; + case 4: + va |= v[4] << 4 & 0x200; + va |= v[5] << 5 & 0x400; + vb0 |= v[2] & 0x40; + vb1 |= v[3] & 0x40; + vb0 |= v[4] << 1 & 0x80; + vb1 |= v[5] << 1 & 0x80; + break; + case 5: + va |= v[2] << 3 & 0x200; + va |= v[3] << 4 & 0x400; + vc |= v[5] & 0x40; + vc |= v[4] << 1 & 0x80; + break; + case 6: + va |= v[4] << 4 & 0x200; + va |= v[5] << 5 & 0x400; + va |= v[4] << 5 & 0x800; + vc |= v[5] & 0x40; + vb0 |= v[2] & 0x40; + vb1 |= v[3] & 0x40; + break; + case 7: + va |= v[2] << 3 & 0x200; + va |= v[3] << 4 & 0x400; + va |= v[4] << 5 & 0x800; + vc |= v[5] & 0x40; + break; + } + + int shamt = (mode >> 1) ^ 3; + va <<= shamt; + vb0 <<= shamt; + vb1 <<= shamt; + vc <<= shamt; + int mult = 1 << shamt; + vd0 *= mult; + vd1 *= mult; + + if (major_component == 1) + set_endpoint_hdr_clamp(endpoints, va - vb0 - vc - vd0, va - vc, va - vb1 - vc - vd1, alpha1, va - vb0, va, + va - vb1, alpha2); + else if (major_component == 2) + set_endpoint_hdr_clamp(endpoints, va - vb1 - vc - vd1, va - vb0 - vc - vd0, va - vc, alpha1, va - vb1, va - vb0, + va, alpha2); + else + set_endpoint_hdr_clamp(endpoints, va - vc, va - vb0 - vc - vd0, va - vb1 - vc - vd1, alpha1, va, va - vb0, + va - vb1, alpha2); +} + +void decode_endpoints(const uint8_t *buf, BlockData *data) { + static const int TritsTable[] = {0, 204, 93, 44, 22, 11, 5}; + static const int QuintsTable[] = {0, 113, 54, 26, 13, 6}; + IntSeqData seq[32]; + int ev[32]; + decode_intseq(buf, data->part_num == 1 ? 17 : 29, CemTableA[data->cem_range], CemTableB[data->cem_range], + data->endpoint_value_num, 0, seq); + + switch (CemTableA[data->cem_range]) { + case 3: + for (int i = 0, b, c = TritsTable[CemTableB[data->cem_range]]; i < data->endpoint_value_num; i++) { + int a = (seq[i].bits & 1) * 0x1ff; + int x = seq[i].bits >> 1; + switch (CemTableB[data->cem_range]) { + case 1: + b = 0; + break; + case 2: + b = 0b100010110 * x; + break; + case 3: + b = x << 7 | x << 2 | x; + break; + case 4: + b = x << 6 | x; + break; + case 5: + b = x << 5 | x >> 2; + break; + case 6: + b = x << 4 | x >> 4; + break; + } + ev[i] = (a & 0x80) | ((seq[i].nonbits * c + b) ^ a) >> 2; + } + break; + case 5: + for (int i = 0, b, c = QuintsTable[CemTableB[data->cem_range]]; i < data->endpoint_value_num; i++) { + int a = (seq[i].bits & 1) * 0x1ff; + int x = seq[i].bits >> 1; + switch (CemTableB[data->cem_range]) { + case 1: + b = 0; + break; + case 2: + b = 0b100001100 * x; + break; + case 3: + b = x << 7 | x << 1 | x >> 1; + break; + case 4: + b = x << 6 | x >> 1; + break; + case 5: + b = x << 5 | x >> 3; + break; + } + ev[i] = (a & 0x80) | ((seq[i].nonbits * c + b) ^ a) >> 2; + } + break; + default: + switch (CemTableB[data->cem_range]) { + case 1: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits * 0xff; + break; + case 2: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits * 0x55; + break; + case 3: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits << 5 | seq[i].bits << 2 | seq[i].bits >> 1; + break; + case 4: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits << 4 | seq[i].bits; + break; + case 5: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits << 3 | seq[i].bits >> 2; + break; + case 6: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits << 2 | seq[i].bits >> 4; + break; + case 7: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits << 1 | seq[i].bits >> 6; + break; + case 8: + for (int i = 0; i < data->endpoint_value_num; i++) + ev[i] = seq[i].bits; + break; + } + } + + int *v = ev; + for (int cem = 0; cem < data->part_num; v += (data->cem[cem] / 4 + 1) * 2, cem++) { + switch (data->cem[cem]) { + case 0: + set_endpoint(data->endpoints[cem], v[0], v[0], v[0], 255, v[1], v[1], v[1], 255); + break; + case 1: { + int l0 = (v[0] >> 2) | (v[1] & 0xc0); + int l1 = clamp(l0 + (v[1] & 0x3f)); + set_endpoint(data->endpoints[cem], l0, l0, l0, 255, l1, l1, l1, 255); + } break; + case 2: { + int y0, y1; + if (v[0] <= v[1]) { + y0 = v[0] << 4; + y1 = v[1] << 4; + } else { + y0 = (v[1] << 4) + 8; + y1 = (v[0] << 4) - 8; + } + set_endpoint_hdr(data->endpoints[cem], y0, y0, y0, 0x780, y1, y1, y1, 0x780); + } break; + case 3: { + int y0, d; + if (v[0] & 0x80) { + y0 = (v[1] & 0xe0) << 4 | (v[0] & 0x7f) << 2; + d = (v[1] & 0x1f) << 2; + } else { + y0 = (v[1] & 0xf0) << 4 | (v[0] & 0x7f) << 1; + d = (v[1] & 0x0f) << 1; + } + int y1 = clamp_hdr(y0 + d); + set_endpoint_hdr(data->endpoints[cem], y0, y0, y0, 0x780, y1, y1, y1, 0x780); + } break; + case 4: + set_endpoint(data->endpoints[cem], v[0], v[0], v[0], v[2], v[1], v[1], v[1], v[3]); + break; + case 5: + bit_transfer_signed(&v[1], &v[0]); + bit_transfer_signed(&v[3], &v[2]); + v[1] += v[0]; + set_endpoint_clamp(data->endpoints[cem], v[0], v[0], v[0], v[2], v[1], v[1], v[1], v[2] + v[3]); + break; + case 6: + set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, 255, v[0], v[1], + v[2], 255); + break; + case 7: + decode_endpoints_hdr7(data->endpoints[cem], v); + break; + case 8: + if (v[0] + v[2] + v[4] <= v[1] + v[3] + v[5]) + set_endpoint(data->endpoints[cem], v[0], v[2], v[4], 255, v[1], v[3], v[5], 255); + else + set_endpoint_blue(data->endpoints[cem], v[1], v[3], v[5], 255, v[0], v[2], v[4], 255); + break; + case 9: + bit_transfer_signed(&v[1], &v[0]); + bit_transfer_signed(&v[3], &v[2]); + bit_transfer_signed(&v[5], &v[4]); + if (v[1] + v[3] + v[5] >= 0) + set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], 255, v[0] + v[1], v[2] + v[3], v[4] + v[5], + 255); + else + set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], 255, v[0], v[2], + v[4], 255); + break; + case 10: + set_endpoint(data->endpoints[cem], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8, v[4], v[0], v[1], + v[2], v[5]); + break; + case 11: + decode_endpoints_hdr11(data->endpoints[cem], v, 0x780, 0x780); + break; + case 12: + if (v[0] + v[2] + v[4] <= v[1] + v[3] + v[5]) + set_endpoint(data->endpoints[cem], v[0], v[2], v[4], v[6], v[1], v[3], v[5], v[7]); + else + set_endpoint_blue(data->endpoints[cem], v[1], v[3], v[5], v[7], v[0], v[2], v[4], v[6]); + break; + case 13: + bit_transfer_signed(&v[1], &v[0]); + bit_transfer_signed(&v[3], &v[2]); + bit_transfer_signed(&v[5], &v[4]); + bit_transfer_signed(&v[7], &v[6]); + if (v[1] + v[3] + v[5] >= 0) + set_endpoint_clamp(data->endpoints[cem], v[0], v[2], v[4], v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5], + v[6] + v[7]); + else + set_endpoint_blue_clamp(data->endpoints[cem], v[0] + v[1], v[2] + v[3], v[4] + v[5], v[6] + v[7], v[0], + v[2], v[4], v[6]); + break; + case 14: + decode_endpoints_hdr11(data->endpoints[cem], v, v[6], v[7]); + break; + case 15: { + int mode = ((v[6] >> 7) & 1) | ((v[7] >> 6) & 2); + v[6] &= 0x7f; + v[7] &= 0x7f; + if (mode == 3) { + decode_endpoints_hdr11(data->endpoints[cem], v, v[6] << 5, v[7] << 5); + } else { + v[6] |= (v[7] << (mode + 1)) & 0x780; + v[7] = ((v[7] & (0x3f >> mode)) ^ (0x20 >> mode)) - (0x20 >> mode); + v[6] <<= 4 - mode; + v[7] <<= 4 - mode; + decode_endpoints_hdr11(data->endpoints[cem], v, v[6], clamp_hdr(v[6] + v[7])); + } + } break; + //default: + // rb_raise(rb_eStandardError, "Unsupported ASTC format"); + } + } +} + +void decode_weights(const uint8_t *buf, BlockData *data) { + IntSeqData seq[128]; + int wv[128] = {}; + decode_intseq(buf, 128, WeightPrecTableA[data->weight_range], WeightPrecTableB[data->weight_range], + data->weight_num, 1, seq); + + if (WeightPrecTableA[data->weight_range] == 0) { + switch (WeightPrecTableB[data->weight_range]) { + case 1: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].bits ? 63 : 0; + break; + case 2: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].bits << 4 | seq[i].bits << 2 | seq[i].bits; + break; + case 3: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].bits << 3 | seq[i].bits; + break; + case 4: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].bits << 2 | seq[i].bits >> 2; + break; + case 5: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].bits << 1 | seq[i].bits >> 4; + break; + } + for (int i = 0; i < data->weight_num; i++) + if (wv[i] > 32) + ++wv[i]; + } else if (WeightPrecTableB[data->weight_range] == 0) { + int s = WeightPrecTableA[data->weight_range] == 3 ? 32 : 16; + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].nonbits * s; + } else { + if (WeightPrecTableA[data->weight_range] == 3) { + switch (WeightPrecTableB[data->weight_range]) { + case 1: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].nonbits * 50; + break; + case 2: + for (int i = 0; i < data->weight_num; i++) { + wv[i] = seq[i].nonbits * 23; + if (seq[i].bits & 2) + wv[i] += 0b1000101; + } + break; + case 3: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].nonbits * 11 + ((seq[i].bits << 4 | seq[i].bits >> 1) & 0b1100011); + break; + } + } else if (WeightPrecTableA[data->weight_range] == 5) { + switch (WeightPrecTableB[data->weight_range]) { + case 1: + for (int i = 0; i < data->weight_num; i++) + wv[i] = seq[i].nonbits * 28; + break; + case 2: + for (int i = 0; i < data->weight_num; i++) { + wv[i] = seq[i].nonbits * 13; + if (seq[i].bits & 2) + wv[i] += 0b1000010; + } + break; + } + } + for (int i = 0; i < data->weight_num; i++) { + int a = (seq[i].bits & 1) * 0x7f; + wv[i] = (a & 0x20) | ((wv[i] ^ a) >> 2); + if (wv[i] > 32) + ++wv[i]; + } + } + + int ds = (1024 + data->bw / 2) / (data->bw - 1); + int dt = (1024 + data->bh / 2) / (data->bh - 1); + int pn = data->dual_plane ? 2 : 1; + + for (int t = 0, i = 0; t < data->bh; t++) { + for (int s = 0; s < data->bw; s++, i++) { + int gs = (ds * s * (data->width - 1) + 32) >> 6; + int gt = (dt * t * (data->height - 1) + 32) >> 6; + int fs = gs & 0xf; + int ft = gt & 0xf; + int v = (gs >> 4) + (gt >> 4) * data->width; + int w11 = (fs * ft + 8) >> 4; + int w10 = ft - w11; + int w01 = fs - w11; + int w00 = 16 - fs - ft + w11; + + for (int p = 0; p < pn; p++) { + int p00 = wv[v * pn + p]; + int p01 = wv[(v + 1) * pn + p]; + int p10 = wv[(v + data->width) * pn + p]; + int p11 = wv[(v + data->width + 1) * pn + p]; + data->weights[i][p] = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; + } + } + } +} + +void select_partition(const uint8_t *buf, BlockData *data) { + int small_block = data->bw * data->bh < 31; + int seed = (*(int *)buf >> 13 & 0x3ff) | (data->part_num - 1) << 10; + + uint32_t rnum = seed; + rnum ^= rnum >> 15; + rnum -= rnum << 17; + rnum += rnum << 7; + rnum += rnum << 4; + rnum ^= rnum >> 5; + rnum += rnum << 16; + rnum ^= rnum >> 7; + rnum ^= rnum >> 3; + rnum ^= rnum << 6; + rnum ^= rnum >> 17; + + int seeds[8]; + for (int i = 0; i < 8; i++) { + seeds[i] = (rnum >> (i * 4)) & 0xF; + seeds[i] *= seeds[i]; + } + + int sh[2] = {seed & 2 ? 4 : 5, data->part_num == 3 ? 6 : 5}; + + if (seed & 1) + for (int i = 0; i < 8; i++) + seeds[i] >>= sh[i % 2]; + else + for (int i = 0; i < 8; i++) + seeds[i] >>= sh[1 - i % 2]; + + if (small_block) { + for (int t = 0, i = 0; t < data->bh; t++) { + for (int s = 0; s < data->bw; s++, i++) { + int x = s << 1; + int y = t << 1; + int a = (seeds[0] * x + seeds[1] * y + (rnum >> 14)) & 0x3f; + int b = (seeds[2] * x + seeds[3] * y + (rnum >> 10)) & 0x3f; + int c = data->part_num < 3 ? 0 : (seeds[4] * x + seeds[5] * y + (rnum >> 6)) & 0x3f; + int d = data->part_num < 4 ? 0 : (seeds[6] * x + seeds[7] * y + (rnum >> 2)) & 0x3f; + data->partition[i] = (a >= b && a >= c && a >= d) ? 0 : (b >= c && b >= d) ? 1 : (c >= d) ? 2 : 3; + } + } + } else { + for (int y = 0, i = 0; y < data->bh; y++) { + for (int x = 0; x < data->bw; x++, i++) { + int a = (seeds[0] * x + seeds[1] * y + (rnum >> 14)) & 0x3f; + int b = (seeds[2] * x + seeds[3] * y + (rnum >> 10)) & 0x3f; + int c = data->part_num < 3 ? 0 : (seeds[4] * x + seeds[5] * y + (rnum >> 6)) & 0x3f; + int d = data->part_num < 4 ? 0 : (seeds[6] * x + seeds[7] * y + (rnum >> 2)) & 0x3f; + data->partition[i] = (a >= b && a >= c && a >= d) ? 0 : (b >= c && b >= d) ? 1 : (c >= d) ? 2 : 3; + } + } + } +} + +void applicate_color(const BlockData *data, uint32_t *outbuf) { + static const t_select_folor_func_ptr FuncTableC[] = { + select_color, select_color, select_color_hdr, select_color_hdr, select_color, select_color, + select_color, select_color_hdr, select_color, select_color, select_color, select_color_hdr, + select_color, select_color, select_color_hdr, select_color_hdr}; + static const t_select_folor_func_ptr FuncTableA[] = { + select_color, select_color, select_color_hdr, select_color_hdr, select_color, select_color, + select_color, select_color_hdr, select_color, select_color, select_color, select_color_hdr, + select_color, select_color, select_color, select_color_hdr}; + if (data->dual_plane) { + int ps[] = {0, 0, 0, 0}; + ps[data->plane_selector] = 1; + if (data->part_num > 1) { + for (int i = 0; i < data->bw * data->bh; i++) { + int p = data->partition[i]; + uint_fast8_t r = + FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][ps[0]]); + uint_fast8_t g = + FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][ps[1]]); + uint_fast8_t b = + FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][ps[2]]); + uint_fast8_t a = + FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][ps[3]]); + outbuf[i] = color(r, g, b, a); + } + } else { + for (int i = 0; i < data->bw * data->bh; i++) { + uint_fast8_t r = + FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][ps[0]]); + uint_fast8_t g = + FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][ps[1]]); + uint_fast8_t b = + FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][ps[2]]); + uint_fast8_t a = + FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][ps[3]]); + outbuf[i] = color(r, g, b, a); + } + } + } else if (data->part_num > 1) { + for (int i = 0; i < data->bw * data->bh; i++) { + int p = data->partition[i]; + uint_fast8_t r = + FuncTableC[data->cem[p]](data->endpoints[p][0], data->endpoints[p][4], data->weights[i][0]); + uint_fast8_t g = + FuncTableC[data->cem[p]](data->endpoints[p][1], data->endpoints[p][5], data->weights[i][0]); + uint_fast8_t b = + FuncTableC[data->cem[p]](data->endpoints[p][2], data->endpoints[p][6], data->weights[i][0]); + uint_fast8_t a = + FuncTableA[data->cem[p]](data->endpoints[p][3], data->endpoints[p][7], data->weights[i][0]); + outbuf[i] = color(r, g, b, a); + } + } else { + for (int i = 0; i < data->bw * data->bh; i++) { + uint_fast8_t r = + FuncTableC[data->cem[0]](data->endpoints[0][0], data->endpoints[0][4], data->weights[i][0]); + uint_fast8_t g = + FuncTableC[data->cem[0]](data->endpoints[0][1], data->endpoints[0][5], data->weights[i][0]); + uint_fast8_t b = + FuncTableC[data->cem[0]](data->endpoints[0][2], data->endpoints[0][6], data->weights[i][0]); + uint_fast8_t a = + FuncTableA[data->cem[0]](data->endpoints[0][3], data->endpoints[0][7], data->weights[i][0]); + outbuf[i] = color(r, g, b, a); + } + } +} + +void decode_block(const uint8_t *buf, const int bw, const int bh, uint32_t *outbuf) { + if (buf[0] == 0xfc && (buf[1] & 1) == 1) { + uint_fast32_t c; + if (buf[1] & 2) + c = color(f16ptr_to_u8(buf + 8), f16ptr_to_u8(buf + 10), f16ptr_to_u8(buf + 12), f16ptr_to_u8(buf + 14)); + else + c = color(buf[9], buf[11], buf[13], buf[15]); + for (int i = 0; i < bw * bh; i++) + outbuf[i] = c; + } else if (((buf[0] & 0xc3) == 0xc0 && (buf[1] & 1) == 1) || (buf[0] & 0xf) == 0) { + uint_fast32_t c = color(255, 0, 255, 255); + for (int i = 0; i < bw * bh; i++) + outbuf[i] = c; + } else { + BlockData block_data; + block_data.bw = bw; + block_data.bh = bh; + decode_block_params(buf, &block_data); + decode_endpoints(buf, &block_data); + decode_weights(buf, &block_data); + if (block_data.part_num > 1) + select_partition(buf, &block_data); + applicate_color(&block_data, outbuf); + } +} + +int decode_astc(const uint8_t *data, const long w, const long h, const int bw, const int bh, uint32_t *image) { + const long num_blocks_x = (w + bw - 1) / bw; + const long num_blocks_y = (h + bh - 1) / bh; + uint32_t buffer[144]; + const uint8_t *d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 16) { + decode_block(d, bw, bh, buffer); + copy_block_buffer(bx, by, w, h, bw, bh, buffer, image); + } + } + return 1; +} diff --git a/Texture2DDecoder/astc.h b/Texture2DDecoder/astc.h new file mode 100644 index 0000000..3fa2cfc --- /dev/null +++ b/Texture2DDecoder/astc.h @@ -0,0 +1,8 @@ +#ifndef ASTC_H +#define ASTC_H + +#include + +int decode_astc(const uint8_t *, const long, const long, const int, const int, uint32_t *); + +#endif /* end of include guard: ASTC_H */ diff --git a/Texture2DDecoder/atc.cpp b/Texture2DDecoder/atc.cpp new file mode 100644 index 0000000..9f3e209 --- /dev/null +++ b/Texture2DDecoder/atc.cpp @@ -0,0 +1,91 @@ +#include "bcn.h" +#include "atc.h" +#include "color.h" +#include + +static uint8_t expand_quantized(uint8_t v, int bits) { + v = v << (8 - bits); + return v | (v >> bits); +} + +void decode_atc_block(const uint8_t* _src, uint32_t* _dst) +{ + uint8_t colors[4 * 4]; + + uint32_t c0 = _src[0] | (_src[1] << 8); + uint32_t c1 = _src[2] | (_src[3] << 8); + + if (0 == (c0 & 0x8000)) + { + colors[0] = expand_quantized((c0 >> 0) & 0x1f, 5); + colors[1] = expand_quantized((c0 >> 5) & 0x1f, 5); + colors[2] = expand_quantized((c0 >> 10) & 0x1f, 5); + + colors[12] = expand_quantized((c1 >> 0) & 0x1f, 5); + colors[13] = expand_quantized((c1 >> 5) & 0x3f, 6); + colors[14] = expand_quantized((c1 >> 11) & 0x1f, 5); + + colors[4] = (5 * colors[0] + 3 * colors[12]) / 8; + colors[5] = (5 * colors[1] + 3 * colors[13]) / 8; + colors[6] = (5 * colors[2] + 3 * colors[14]) / 8; + + colors[8] = (3 * colors[0] + 5 * colors[12]) / 8; + colors[9] = (3 * colors[1] + 5 * colors[13]) / 8; + colors[10] = (3 * colors[2] + 5 * colors[14]) / 8; + } + else + { + colors[0] = 0; + colors[1] = 0; + colors[2] = 0; + + colors[8] = expand_quantized((c0 >> 0) & 0x1f, 5); + colors[9] = expand_quantized((c0 >> 5) & 0x1f, 5); + colors[10] = expand_quantized((c0 >> 10) & 0x1f, 5); + + colors[12] = expand_quantized((c1 >> 0) & 0x1f, 5); + colors[13] = expand_quantized((c1 >> 5) & 0x3f, 6); + colors[14] = expand_quantized((c1 >> 11) & 0x1f, 5); + + colors[4] = std::max(0, colors[8] - colors[12] / 4); + colors[5] = std::max(0, colors[9] - colors[13] / 4); + colors[6] = std::max(0, colors[10] - colors[14] / 4); + } + + for (uint32_t i = 0, next = 8 * 4; i < 16; i += 1, next += 2) + { + int32_t idx = ((_src[next >> 3] >> (next & 7)) & 3) * 4; + _dst[i] = color(colors[idx + 2], colors[idx + 1], colors[idx + 0], 255); + } +} + +int decode_atc_rgb4(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 8) { + decode_atc_block(data, buffer); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} + +int decode_atc_rgba8(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 16) { + decode_atc_block(data + 8, buffer); + decode_bc3_alpha(data, buffer, 3); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} \ No newline at end of file diff --git a/Texture2DDecoder/atc.h b/Texture2DDecoder/atc.h new file mode 100644 index 0000000..4b2c1bb --- /dev/null +++ b/Texture2DDecoder/atc.h @@ -0,0 +1,5 @@ +#pragma once +#include + +int decode_atc_rgb4(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); +int decode_atc_rgba8(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); \ No newline at end of file diff --git a/Texture2DDecoder/bcn.cpp b/Texture2DDecoder/bcn.cpp new file mode 100644 index 0000000..1dc0d88 --- /dev/null +++ b/Texture2DDecoder/bcn.cpp @@ -0,0 +1,1135 @@ +#include "bcn.h" +#include +#include +#include +#include "color.h" +#include "fp16.h" + +static inline void decode_bc1_block(const uint8_t* data, uint32_t* outbuf) { + uint8_t r0, g0, b0, r1, g1, b1; + int q0 = *(uint16_t*)(data); + int q1 = *(uint16_t*)(data + 2); + rgb565_le(q0, &r0, &g0, &b0); + rgb565_le(q1, &r1, &g1, &b1); + uint_fast32_t c[4] = { color(r0, g0, b0, 255), color(r1, g1, b1, 255) }; + if (q0 > q1) { + c[2] = color((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3] = color((r0 + r1 * 2) / 3, (g0 + g1 * 2) / 3, (b0 + b1 * 2) / 3, 255); + } + else { + c[2] = color((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + c[3] = color(0, 0, 0, 255); + } + uint_fast32_t d = lton32(*(uint32_t*)(data + 4)); + for (int i = 0; i < 16; i++, d >>= 2) + outbuf[i] = c[d & 3]; +} + +int decode_bc1(const uint8_t* data, const long w, const long h, uint32_t* image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + const uint8_t* d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 8) { + decode_bc1_block(d, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +void decode_bc3_alpha(const uint8_t* data, uint32_t* outbuf, int channel) { + uint_fast8_t a[8] = { data[0], data[1] }; + if (a[0] > a[1]) { + a[2] = (a[0] * 6 + a[1]) / 7; + a[3] = (a[0] * 5 + a[1] * 2) / 7; + a[4] = (a[0] * 4 + a[1] * 3) / 7; + a[5] = (a[0] * 3 + a[1] * 4) / 7; + a[6] = (a[0] * 2 + a[1] * 5) / 7; + a[7] = (a[0] + a[1] * 6) / 7; + } + else { + a[2] = (a[0] * 4 + a[1]) / 5; + a[3] = (a[0] * 3 + a[1] * 2) / 5; + a[4] = (a[0] * 2 + a[1] * 3) / 5; + a[5] = (a[0] + a[1] * 4) / 5; + a[6] = 0; + a[7] = 255; + } + + uint8_t* dst = (uint8_t*)outbuf; + uint_fast64_t d = lton64(*(uint64_t*)data) >> 16; + for (int i = 0; i < 16; i++, d >>= 3) + dst[i * 4 + channel] = a[d & 7]; +} + +static inline void decode_bc3_block(const uint8_t* data, uint32_t* outbuf) { + decode_bc1_block(data + 8, outbuf); + decode_bc3_alpha(data, outbuf, 3); +} + +int decode_bc3(const uint8_t* data, const long w, const long h, uint32_t* image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + const uint8_t* d = data; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, d += 16) { + decode_bc3_block(d, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +static inline void decode_bc4_block(const uint8_t* data, uint32_t* outbuf) { + decode_bc3_alpha(data, outbuf, 2); +} + +int decode_bc4(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t i = 0; i < 16; i++) + buffer[i] = 0xff000000; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 8) { + decode_bc4_block(data, buffer); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} + +static inline void decode_bc5_block(const uint8_t* data, uint32_t* outbuf) { + decode_bc3_alpha(data, outbuf, 2); + decode_bc3_alpha(data + 8, outbuf, 1); +} + +int decode_bc5(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t i = 0; i < 16; i++) + buffer[i] = 0xff000000; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 16) { + decode_bc5_block(data, buffer); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} + +struct BitReader +{ + BitReader(const uint8_t* _data, uint16_t _bitPos = 0) + : m_data(_data) + , m_bitPos(_bitPos) + { + } + + uint16_t read(uint8_t _numBits) + { + const uint16_t pos = m_bitPos / 8; + const uint16_t shift = m_bitPos & 7; + uint32_t data = 0; + memcpy(&data, &m_data[pos], std::min(4, 16 - pos)); + m_bitPos += _numBits; + return uint16_t((data >> shift) & ((1 << _numBits) - 1)); + } + + uint16_t peek(uint16_t _offset, uint8_t _numBits) + { + const uint16_t bitPos = m_bitPos + _offset; + const uint16_t shift = bitPos & 7; + uint16_t pos = bitPos / 8; + uint32_t data = 0; + memcpy(&data, &m_data[pos], std::min(4, 16 - pos)); + return uint8_t((data >> shift) & ((1 << _numBits) - 1)); + } + + const uint8_t* m_data; + uint16_t m_bitPos; +}; + +static const uint16_t s_bptcP2[] = +{ // 3210 0000000000 1111111111 2222222222 3333333333 + 0xcccc, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 + 0x8888, // 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 + 0xeeee, // 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 + 0xecc8, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 + 0xc880, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 + 0xfeec, // 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xfec8, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xec80, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 + 0xc800, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 + 0xffec, // 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xfe80, // 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 + 0xe800, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 + 0xffe8, // 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xff00, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 + 0xfff0, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 + 0xf000, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 + 0xf710, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 + 0x008e, // 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 + 0x7100, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 + 0x08ce, // 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 + 0x008c, // 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 + 0x7310, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 + 0x3100, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 + 0x8cce, // 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 + 0x088c, // 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 + 0x3110, // 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 + 0x6666, // 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 + 0x366c, // 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 + 0x17e8, // 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 + 0x0ff0, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 + 0x718e, // 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 + 0x399c, // 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 + 0xaaaa, // 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 + 0xf0f0, // 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 + 0x5a5a, // 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 + 0x33cc, // 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 + 0x3c3c, // 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 + 0x55aa, // 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 + 0x9696, // 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 + 0xa55a, // 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 + 0x73ce, // 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 + 0x13c8, // 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 + 0x324c, // 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 + 0x3bdc, // 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 + 0x6996, // 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 + 0xc33c, // 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 + 0x9966, // 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 + 0x0660, // 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 + 0x0272, // 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 + 0x04e4, // 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 + 0x4e40, // 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 + 0x2720, // 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 + 0xc936, // 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 + 0x936c, // 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 + 0x39c6, // 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 + 0x639c, // 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 + 0x9336, // 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 + 0x9cc6, // 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 + 0x817e, // 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 + 0xe718, // 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 + 0xccf0, // 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 + 0x0fcc, // 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 + 0x7744, // 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 + 0xee22, // 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 +}; + +static const uint8_t s_bptcA2[] = +{ + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 2, 8, 2, 2, 8, 8, 15, + 2, 8, 2, 2, 8, 8, 2, 2, + 15, 15, 6, 8, 2, 8, 15, 15, + 2, 8, 2, 2, 2, 15, 15, 6, + 6, 2, 6, 8, 15, 15, 2, 2, + 15, 15, 15, 15, 15, 2, 2, 15, +}; + +static const uint8_t s_bptcFactors[3][16] = +{ + { 0, 21, 43, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 9, 18, 27, 37, 46, 55, 64, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }, +}; + +struct Bc6hModeInfo +{ + uint8_t transformed; + uint8_t partitionBits; + uint8_t endpointBits; + uint8_t deltaBits[3]; +}; + +static const Bc6hModeInfo s_bc6hModeInfo[] = +{ // +--------------------------- transformed + // | +------------------------ partition bits + // | | +--------------------- endpoint bits + // | | | +-------------- delta bits + { 1, 5, 10, { 5, 5, 5 } }, // 00 2-bits + { 1, 5, 7, { 6, 6, 6 } }, // 01 + { 1, 5, 11, { 5, 4, 4 } }, // 00010 5-bits + { 0, 0, 10, { 10, 10, 10 } }, // 00011 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 11, { 4, 5, 4 } }, // 00110 + { 1, 0, 11, { 9, 9, 9 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 11, { 4, 4, 5 } }, // 00010 + { 1, 0, 12, { 8, 8, 8 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 9, { 5, 5, 5 } }, // 00010 + { 1, 0, 16, { 4, 4, 4 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 6, 5, 5 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 5, 6, 5 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 1, 5, 8, { 5, 5, 6 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 0, 0, { 0, 0, 0 } }, // - + { 0, 5, 6, { 6, 6, 6 } }, // 00010 + { 0, 0, 0, { 0, 0, 0 } }, // - +}; + +static uint16_t unquantize(uint16_t _value, bool _signed, uint8_t _endpointBits) +{ + const uint16_t maxValue = 1 << (_endpointBits - 1); + + if (_signed) + { + if (_endpointBits >= 16) + { + return _value; + } + + const bool sign = !!(_value & 0x8000); + _value &= 0x7fff; + + uint16_t unq; + + if (0 == _value) + { + unq = 0; + } + else if (_value >= maxValue - 1) + { + unq = 0x7fff; + } + else + { + unq = ((_value << 15) + 0x4000) >> (_endpointBits - 1); + } + + return sign ? -unq : unq; + } + + if (_endpointBits >= 15) + { + return _value; + } + + if (0 == _value) + { + return 0; + } + + if (_value == maxValue) + { + return UINT16_MAX; + } + + return ((_value << 15) + 0x4000) >> (_endpointBits - 1); +} + +static uint16_t finish_unquantize(uint16_t _value, bool _signed) +{ + if (_signed) + { + const uint16_t sign = _value & 0x8000; + _value &= 0x7fff; + + return ((_value * 31) >> 5) | sign; + } + + return (_value * 31) >> 6; +} + +static uint16_t sign_extend(uint16_t _value, uint8_t _numBits) +{ + const uint16_t mask = 1 << (_numBits - 1); + const uint16_t result = (_value ^ mask) - mask; + + return result; +} + +static inline uint8_t f32_to_u8(const float f) { + float c = roundf(f * 255); + if (c < 0) + return 0; + else if (c > 255) + return 255; + else + return c; +} + +static uint8_t half_to_u8(uint16_t h) { + return f32_to_u8(fp16_ieee_to_fp32_value(h)); +} + +static void decode_bc6_block(const uint8_t* _src, uint32_t* _dst, bool _signed) +{ + BitReader bit(_src); + + uint8_t mode = uint8_t(bit.read(2)); + + uint16_t epR[4] = { /* rw, rx, ry, rz */ }; + uint16_t epG[4] = { /* gw, gx, gy, gz */ }; + uint16_t epB[4] = { /* bw, bx, by, bz */ }; + + if (mode & 2) + { + // 5-bit mode + mode |= bit.read(3) << 2; + + if (0 == s_bc6hModeInfo[mode].endpointBits) + { + memset(_dst, 0, 16 * 4); + return; + } + + switch (mode) + { + case 2: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(5) << 0; + epR[0] |= bit.read(1) << 10; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(4) << 0; + epG[0] |= bit.read(1) << 10; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(4) << 0; + epB[0] |= bit.read(1) << 10; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 3; + break; + + case 3: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(10) << 0; + epG[1] |= bit.read(10) << 0; + epB[1] |= bit.read(10) << 0; + break; + + case 6: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(4) << 0; + epR[0] |= bit.read(1) << 10; + epG[3] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(5) << 0; + epG[0] |= bit.read(1) << 10; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(4) << 0; + epB[0] |= bit.read(1) << 10; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(4) << 0; + epB[3] |= bit.read(1) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(4) << 0; + epG[2] |= bit.read(1) << 4; + epB[3] |= bit.read(1) << 3; + break; + + case 7: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(9) << 0; + epR[0] |= bit.read(1) << 10; + epG[1] |= bit.read(9) << 0; + epG[0] |= bit.read(1) << 10; + epB[1] |= bit.read(9) << 0; + epB[0] |= bit.read(1) << 10; + break; + + case 10: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(4) << 0; + epR[0] |= bit.read(1) << 10; + epB[2] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(4) << 0; + epG[0] |= bit.read(1) << 10; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(5) << 0; + epB[0] |= bit.read(1) << 10; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(4) << 0; + epB[3] |= bit.read(1) << 1; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(4) << 0; + epB[3] |= bit.read(1) << 4; + epB[3] |= bit.read(1) << 3; + break; + + case 11: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(8) << 0; + epR[0] |= bit.read(1) << 11; + epR[0] |= bit.read(1) << 10; + epG[1] |= bit.read(8) << 0; + epG[0] |= bit.read(1) << 11; + epG[0] |= bit.read(1) << 10; + epB[1] |= bit.read(8) << 0; + epB[0] |= bit.read(1) << 11; + epB[0] |= bit.read(1) << 10; + break; + + case 14: + epR[0] |= bit.read(9) << 0; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(9) << 0; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(9) << 0; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(5) << 0; + epG[3] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 3; + break; + + case 15: + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(4) << 0; + epR[0] |= bit.read(1) << 15; + epR[0] |= bit.read(1) << 14; + epR[0] |= bit.read(1) << 13; + epR[0] |= bit.read(1) << 12; + epR[0] |= bit.read(1) << 11; + epR[0] |= bit.read(1) << 10; + epG[1] |= bit.read(4) << 0; + epG[0] |= bit.read(1) << 15; + epG[0] |= bit.read(1) << 14; + epG[0] |= bit.read(1) << 13; + epG[0] |= bit.read(1) << 12; + epG[0] |= bit.read(1) << 11; + epG[0] |= bit.read(1) << 10; + epB[1] |= bit.read(4) << 0; + epB[0] |= bit.read(1) << 15; + epB[0] |= bit.read(1) << 14; + epB[0] |= bit.read(1) << 13; + epB[0] |= bit.read(1) << 12; + epB[0] |= bit.read(1) << 11; + epB[0] |= bit.read(1) << 10; + break; + + case 18: + epR[0] |= bit.read(8) << 0; + epG[3] |= bit.read(1) << 4; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(8) << 0; + epB[3] |= bit.read(1) << 2; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(8) << 0; + epB[3] |= bit.read(1) << 3; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(6) << 0; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(6) << 0; + epR[3] |= bit.read(6) << 0; + break; + + case 22: + epR[0] |= bit.read(8) << 0; + epB[3] |= bit.read(1) << 0; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(8) << 0; + epG[2] |= bit.read(1) << 5; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(8) << 0; + epG[3] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(5) << 0; + epG[3] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(6) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 3; + break; + + case 26: + epR[0] |= bit.read(8) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(8) << 0; + epB[2] |= bit.read(1) << 5; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(8) << 0; + epB[3] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(5) << 0; + epG[3] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(6) << 0; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 3; + break; + + case 30: + epR[0] |= bit.read(6) << 0; + epG[3] |= bit.read(1) << 4; + epB[3] |= bit.read(1) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(6) << 0; + epG[2] |= bit.read(1) << 5; + epB[2] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 2; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(6) << 0; + epG[3] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 3; + epB[3] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(6) << 0; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(6) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(6) << 0; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(6) << 0; + epR[3] |= bit.read(6) << 0; + break; + + default: + break; + } + } + else + { + switch (mode) + { + case 0: + epG[2] |= bit.read(1) << 4; + epB[2] |= bit.read(1) << 4; + epB[3] |= bit.read(1) << 4; + epR[0] |= bit.read(10) << 0; + epG[0] |= bit.read(10) << 0; + epB[0] |= bit.read(10) << 0; + epR[1] |= bit.read(5) << 0; + epG[3] |= bit.read(1) << 4; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 2; + epR[3] |= bit.read(5) << 0; + epB[3] |= bit.read(1) << 3; + break; + + case 1: + epG[2] |= bit.read(1) << 5; + epG[3] |= bit.read(1) << 4; + epG[3] |= bit.read(1) << 5; + epR[0] |= bit.read(7) << 0; + epB[3] |= bit.read(1) << 0; + epB[3] |= bit.read(1) << 1; + epB[2] |= bit.read(1) << 4; + epG[0] |= bit.read(7) << 0; + epB[2] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 2; + epG[2] |= bit.read(1) << 4; + epB[0] |= bit.read(7) << 0; + epB[3] |= bit.read(1) << 3; + epB[3] |= bit.read(1) << 5; + epB[3] |= bit.read(1) << 4; + epR[1] |= bit.read(6) << 0; + epG[2] |= bit.read(4) << 0; + epG[1] |= bit.read(6) << 0; + epG[3] |= bit.read(4) << 0; + epB[1] |= bit.read(6) << 0; + epB[2] |= bit.read(4) << 0; + epR[2] |= bit.read(6) << 0; + epR[3] |= bit.read(6) << 0; + break; + + default: + break; + } + } + + const Bc6hModeInfo mi = s_bc6hModeInfo[mode]; + + if (_signed) + { + epR[0] = sign_extend(epR[0], mi.endpointBits); + epG[0] = sign_extend(epG[0], mi.endpointBits); + epB[0] = sign_extend(epB[0], mi.endpointBits); + } + + const uint8_t numSubsets = !!mi.partitionBits + 1; + + for (uint8_t ii = 1, num = numSubsets * 2; ii < num; ++ii) + { + if (_signed + || mi.transformed) + { + epR[ii] = sign_extend(epR[ii], mi.deltaBits[0]); + epG[ii] = sign_extend(epG[ii], mi.deltaBits[1]); + epB[ii] = sign_extend(epB[ii], mi.deltaBits[2]); + } + + if (mi.transformed) + { + const uint16_t mask = (1 << mi.endpointBits) - 1; + + epR[ii] = (epR[ii] + epR[0]) & mask; + epG[ii] = (epG[ii] + epG[0]) & mask; + epB[ii] = (epB[ii] + epB[0]) & mask; + + if (_signed) + { + epR[ii] = sign_extend(epR[ii], mi.endpointBits); + epG[ii] = sign_extend(epG[ii], mi.endpointBits); + epB[ii] = sign_extend(epB[ii], mi.endpointBits); + } + } + } + + for (uint8_t ii = 0, num = numSubsets * 2; ii < num; ++ii) + { + epR[ii] = unquantize(epR[ii], _signed, mi.endpointBits); + epG[ii] = unquantize(epG[ii], _signed, mi.endpointBits); + epB[ii] = unquantize(epB[ii], _signed, mi.endpointBits); + } + + const uint8_t partitionSetIdx = uint8_t(mi.partitionBits ? bit.read(5) : 0); + const uint8_t indexBits = mi.partitionBits ? 3 : 4; + const uint8_t* factors = s_bptcFactors[indexBits - 2]; + + for (uint8_t yy = 0; yy < 4; ++yy) + { + for (uint8_t xx = 0; xx < 4; ++xx) + { + const uint8_t idx = yy * 4 + xx; + + uint8_t subsetIndex = 0; + uint8_t indexAnchor = 0; + + if (0 != mi.partitionBits) + { + subsetIndex = (s_bptcP2[partitionSetIdx] >> idx) & 1; + indexAnchor = subsetIndex ? s_bptcA2[partitionSetIdx] : 0; + } + + const uint8_t anchor = idx == indexAnchor; + const uint8_t num = indexBits - anchor; + const uint8_t index = (uint8_t)bit.read(num); + + const uint8_t fc = factors[index]; + const uint8_t fca = 64 - fc; + const uint8_t fcb = fc; + + subsetIndex *= 2; + uint16_t rr = finish_unquantize((epR[subsetIndex] * fca + epR[subsetIndex + 1] * fcb + 32) >> 6, _signed); + uint16_t gg = finish_unquantize((epG[subsetIndex] * fca + epG[subsetIndex + 1] * fcb + 32) >> 6, _signed); + uint16_t bb = finish_unquantize((epB[subsetIndex] * fca + epB[subsetIndex + 1] * fcb + 32) >> 6, _signed); + + _dst[idx] = color(half_to_u8(rr), half_to_u8(gg), half_to_u8(bb), 255); + } + } +} + +int decode_bc6(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 16) { + decode_bc6_block(data, buffer, false); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} + +static const uint32_t s_bptcP3[] = +{ // 76543210 0000 1111 2222 3333 4444 5555 6666 7777 + 0xaa685050, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 + 0x6a5a5040, // 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 + 0x5a5a4200, // 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 + 0x5450a0a8, // 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 + 0xa5a50000, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 + 0xa0a05050, // 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 + 0x5555a0a0, // 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 + 0x5a5a5050, // 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 + 0xaa550000, // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 + 0xaa555500, // 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 + 0xaaaa5500, // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 + 0x90909090, // 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 + 0x94949494, // 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 + 0xa4a4a4a4, // 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 + 0xa9a59450, // 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 + 0x2a0a4250, // 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 + 0xa5945040, // 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 + 0x0a425054, // 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 + 0xa5a5a500, // 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 + 0x55a0a0a0, // 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 + 0xa8a85454, // 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 + 0x6a6a4040, // 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 + 0xa4a45000, // 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 + 0x1a1a0500, // 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 + 0x0050a4a4, // 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 + 0xaaa59090, // 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 + 0x14696914, // 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 + 0x69691400, // 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 + 0xa08585a0, // 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 + 0xaa821414, // 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 + 0x50a4a450, // 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 + 0x6a5a0200, // 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 + 0xa9a58000, // 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 + 0x5090a0a8, // 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 + 0xa8a09050, // 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 + 0x24242424, // 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 + 0x00aa5500, // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 + 0x24924924, // 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 + 0x24499224, // 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 + 0x50a50a50, // 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 + 0x500aa550, // 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 + 0xaaaa4444, // 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 + 0x66660000, // 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 + 0xa5a0a5a0, // 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 + 0x50a050a0, // 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 + 0x69286928, // 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 + 0x44aaaa44, // 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 + 0x66666600, // 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 + 0xaa444444, // 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 + 0x54a854a8, // 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 + 0x95809580, // 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 + 0x96969600, // 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 + 0xa85454a8, // 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 + 0x80959580, // 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 + 0xaa141414, // 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 + 0x96960000, // 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 + 0xaaaa1414, // 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 + 0xa05050a0, // 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 + 0xa0a5a5a0, // 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 + 0x96000000, // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 + 0x40804080, // 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 + 0xa9a8a9a8, // 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 + 0xaaaaaa44, // 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 + 0x2a4a5254, // 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 +}; + +static const uint8_t s_bptcA3[2][64] = +{ + { + 3, 3, 15, 15, 8, 3, 15, 15, + 8, 8, 6, 6, 6, 5, 3, 3, + 3, 3, 8, 15, 3, 3, 6, 10, + 5, 8, 8, 6, 8, 5, 15, 15, + 8, 15, 3, 5, 6, 10, 8, 15, + 15, 3, 15, 5, 15, 15, 15, 15, + 3, 15, 5, 5, 5, 8, 5, 10, + 5, 10, 8, 13, 15, 12, 3, 3, + }, + { + 15, 8, 8, 3, 15, 15, 3, 8, + 15, 15, 15, 15, 15, 15, 15, 8, + 15, 8, 15, 3, 15, 8, 15, 8, + 3, 15, 6, 10, 15, 15, 10, 8, + 15, 3, 15, 10, 10, 8, 9, 10, + 6, 15, 8, 15, 3, 6, 6, 8, + 15, 3, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 3, 15, 15, 8, + }, +}; + +struct Bc7ModeInfo +{ + uint8_t numSubsets; + uint8_t partitionBits; + uint8_t rotationBits; + uint8_t indexSelectionBits; + uint8_t colorBits; + uint8_t alphaBits; + uint8_t endpointPBits; + uint8_t sharedPBits; + uint8_t indexBits[2]; +}; + +static const Bc7ModeInfo s_bp7ModeInfo[] = +{ // +---------------------------- num subsets + // | +------------------------- partition bits + // | | +---------------------- rotation bits + // | | | +------------------- index selection bits + // | | | | +---------------- color bits + // | | | | | +------------- alpha bits + // | | | | | | +---------- endpoint P-bits + // | | | | | | | +------- shared P-bits + // | | | | | | | | +-- 2x index bits + { 3, 4, 0, 0, 4, 0, 1, 0, { 3, 0 } }, // 0 + { 2, 6, 0, 0, 6, 0, 0, 1, { 3, 0 } }, // 1 + { 3, 6, 0, 0, 5, 0, 0, 0, { 2, 0 } }, // 2 + { 2, 6, 0, 0, 7, 0, 1, 0, { 2, 0 } }, // 3 + { 1, 0, 2, 1, 5, 6, 0, 0, { 2, 3 } }, // 4 + { 1, 0, 2, 0, 7, 8, 0, 0, { 2, 2 } }, // 5 + { 1, 0, 0, 0, 7, 7, 1, 0, { 4, 0 } }, // 6 + { 2, 6, 0, 0, 5, 5, 1, 0, { 2, 0 } }, // 7 +}; + +static uint8_t expand_quantized(uint8_t v, int bits) { + v = v << (8 - bits); + return v | (v >> bits); +} + +static void decode_bc7_block(const uint8_t* _src, uint32_t* _dst) +{ + BitReader bit(_src); + + uint8_t mode = 0; + for (; mode < 8 && 0 == bit.read(1); ++mode) + { + } + + if (mode == 8) + { + memset(_dst, 0, 16 * 4); + return; + } + + const Bc7ModeInfo& mi = s_bp7ModeInfo[mode]; + const uint8_t modePBits = 0 != mi.endpointPBits + ? mi.endpointPBits + : mi.sharedPBits + ; + + const uint8_t partitionSetIdx = uint8_t(bit.read(mi.partitionBits)); + const uint8_t rotationMode = uint8_t(bit.read(mi.rotationBits)); + const uint8_t indexSelectionMode = uint8_t(bit.read(mi.indexSelectionBits)); + + uint8_t epR[6]; + uint8_t epG[6]; + uint8_t epB[6]; + uint8_t epA[6]; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epR[ii * 2 + 0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epR[ii * 2 + 1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epG[ii * 2 + 0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epG[ii * 2 + 1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epB[ii * 2 + 0] = uint8_t(bit.read(mi.colorBits) << modePBits); + epB[ii * 2 + 1] = uint8_t(bit.read(mi.colorBits) << modePBits); + } + + if (mi.alphaBits) + { + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epA[ii * 2 + 0] = uint8_t(bit.read(mi.alphaBits) << modePBits); + epA[ii * 2 + 1] = uint8_t(bit.read(mi.alphaBits) << modePBits); + } + } + else + { + memset(epA, 0xff, 6); + } + + if (0 != modePBits) + { + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + const uint8_t pda = uint8_t(bit.read(modePBits)); + const uint8_t pdb = uint8_t(0 == mi.sharedPBits ? bit.read(modePBits) : pda); + + epR[ii * 2 + 0] |= pda; + epR[ii * 2 + 1] |= pdb; + epG[ii * 2 + 0] |= pda; + epG[ii * 2 + 1] |= pdb; + epB[ii * 2 + 0] |= pda; + epB[ii * 2 + 1] |= pdb; + epA[ii * 2 + 0] |= pda; + epA[ii * 2 + 1] |= pdb; + } + } + + const uint8_t colorBits = mi.colorBits + modePBits; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epR[ii * 2 + 0] = expand_quantized(epR[ii * 2 + 0], colorBits); + epR[ii * 2 + 1] = expand_quantized(epR[ii * 2 + 1], colorBits); + epG[ii * 2 + 0] = expand_quantized(epG[ii * 2 + 0], colorBits); + epG[ii * 2 + 1] = expand_quantized(epG[ii * 2 + 1], colorBits); + epB[ii * 2 + 0] = expand_quantized(epB[ii * 2 + 0], colorBits); + epB[ii * 2 + 1] = expand_quantized(epB[ii * 2 + 1], colorBits); + } + + if (mi.alphaBits) + { + const uint8_t alphaBits = mi.alphaBits + modePBits; + + for (uint8_t ii = 0; ii < mi.numSubsets; ++ii) + { + epA[ii * 2 + 0] = expand_quantized(epA[ii * 2 + 0], alphaBits); + epA[ii * 2 + 1] = expand_quantized(epA[ii * 2 + 1], alphaBits); + } + } + + const bool hasIndexBits1 = 0 != mi.indexBits[1]; + + const uint8_t* factors[] = + { + s_bptcFactors[mi.indexBits[0] - 2], + hasIndexBits1 ? s_bptcFactors[mi.indexBits[1] - 2] : factors[0], + }; + + uint16_t offset[2] = + { + 0, + uint16_t(mi.numSubsets * (16 * mi.indexBits[0] - 1)), + }; + + for (uint8_t yy = 0; yy < 4; ++yy) + { + for (uint8_t xx = 0; xx < 4; ++xx) + { + const uint8_t idx = yy * 4 + xx; + + uint8_t subsetIndex = 0; + uint8_t indexAnchor = 0; + switch (mi.numSubsets) + { + case 2: + subsetIndex = (s_bptcP2[partitionSetIdx] >> idx) & 1; + indexAnchor = 0 != subsetIndex ? s_bptcA2[partitionSetIdx] : 0; + break; + + case 3: + subsetIndex = (s_bptcP3[partitionSetIdx] >> (2 * idx)) & 3; + indexAnchor = 0 != subsetIndex ? s_bptcA3[subsetIndex - 1][partitionSetIdx] : 0; + break; + + default: + break; + } + + const uint8_t anchor = idx == indexAnchor; + const uint8_t num[2] = + { + uint8_t(mi.indexBits[0] - anchor), + uint8_t(hasIndexBits1 ? mi.indexBits[1] - anchor : 0), + }; + + const uint8_t index[2] = + { + (uint8_t)bit.peek(offset[0], num[0]), + hasIndexBits1 ? (uint8_t)bit.peek(offset[1], num[1]) : index[0], + }; + + offset[0] += num[0]; + offset[1] += num[1]; + + const uint8_t fc = factors[indexSelectionMode][index[indexSelectionMode]]; + const uint8_t fa = factors[!indexSelectionMode][index[!indexSelectionMode]]; + + const uint8_t fca = 64 - fc; + const uint8_t fcb = fc; + const uint8_t faa = 64 - fa; + const uint8_t fab = fa; + + subsetIndex *= 2; + uint8_t rr = uint8_t(uint16_t(epR[subsetIndex] * fca + epR[subsetIndex + 1] * fcb + 32) >> 6); + uint8_t gg = uint8_t(uint16_t(epG[subsetIndex] * fca + epG[subsetIndex + 1] * fcb + 32) >> 6); + uint8_t bb = uint8_t(uint16_t(epB[subsetIndex] * fca + epB[subsetIndex + 1] * fcb + 32) >> 6); + uint8_t aa = uint8_t(uint16_t(epA[subsetIndex] * faa + epA[subsetIndex + 1] * fab + 32) >> 6); + + switch (rotationMode) + { + case 1: std::swap(aa, rr); break; + case 2: std::swap(aa, gg); break; + case 3: std::swap(aa, bb); break; + default: break; + }; + + _dst[idx] = color(rr, gg, bb, aa); + } + } +} + +int decode_bc7(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image) { + uint32_t m_block_width = 4; + uint32_t m_block_height = 4; + uint32_t m_blocks_x = (m_width + m_block_width - 1) / m_block_width; + uint32_t m_blocks_y = (m_height + m_block_height - 1) / m_block_height; + uint32_t buffer[16]; + for (uint32_t by = 0; by < m_blocks_y; by++) { + for (uint32_t bx = 0; bx < m_blocks_x; bx++, data += 16) { + decode_bc7_block(data, buffer); + copy_block_buffer(bx, by, m_width, m_height, m_block_width, m_block_height, buffer, image); + } + } + return 1; +} \ No newline at end of file diff --git a/Texture2DDecoder/bcn.h b/Texture2DDecoder/bcn.h new file mode 100644 index 0000000..bfcdfde --- /dev/null +++ b/Texture2DDecoder/bcn.h @@ -0,0 +1,20 @@ +#pragma once +#include + +struct color_bgra +{ + uint8_t b; + uint8_t g; + uint8_t r; + uint8_t a; +}; + +const color_bgra g_black_color{ 0, 0, 0, 255 }; + +int decode_bc1(const uint8_t* data, const long w, const long h, uint32_t* image); +void decode_bc3_alpha(const uint8_t* data, uint32_t* outbuf, int channel); +int decode_bc3(const uint8_t* data, const long w, const long h, uint32_t* image); +int decode_bc4(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); +int decode_bc5(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); +int decode_bc6(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); +int decode_bc7(const uint8_t* data, uint32_t m_width, uint32_t m_height, uint32_t* image); \ No newline at end of file diff --git a/Texture2DDecoder/color.h b/Texture2DDecoder/color.h new file mode 100644 index 0000000..685f87e --- /dev/null +++ b/Texture2DDecoder/color.h @@ -0,0 +1,87 @@ +#ifndef COLOR_H +#define COLOR_H + +#include +#include +#include "endianness.h" + +#ifdef __LITTLE_ENDIAN__ +static const uint_fast32_t TRANSPARENT_MASK = 0x00ffffff; +#else +static const uint_fast32_t TRANSPARENT_MASK = 0xffffff00; +#endif + +static inline uint_fast32_t color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { +#ifdef __LITTLE_ENDIAN__ + return b | g << 8 | r << 16 | a << 24; +#else + return a | r << 8 | g << 16 | b << 24; +#endif +} + +static inline uint_fast32_t alpha_mask(uint8_t a) { +#ifdef __LITTLE_ENDIAN__ + return TRANSPARENT_MASK | a << 24; +#else + return TRANSPARENT_MASK | a; +#endif +} + +static inline void rgb565_le(const uint16_t d, uint8_t *r, uint8_t *g, uint8_t *b) { +#ifdef __LITTLE_ENDIAN__ + *r = (d >> 8 & 0xf8) | (d >> 13); + *g = (d >> 3 & 0xfc) | (d >> 9 & 3); + *b = (d << 3) | (d >> 2 & 7); +#else + *r = (d & 0xf8) | (d >> 5 & 7); + *g = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *b = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_be(const uint16_t d, uint8_t *r, uint8_t *g, uint8_t *b) { +#ifdef __BIG_ENDIAN__ + *r = (d >> 8 & 0xf8) | (d >> 13); + *g = (d >> 3 & 0xfc) | (d >> 9 & 3); + *b = (d << 3) | (d >> 2 & 7); +#else + *r = (d & 0xf8) | (d >> 5 & 7); + *g = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *b = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_lep(const uint16_t d, uint8_t *c) { +#ifdef __LITTLE_ENDIAN__ + *(c++) = (d >> 8 & 0xf8) | (d >> 13); + *(c++) = (d >> 3 & 0xfc) | (d >> 9 & 3); + *(c++) = (d << 3) | (d >> 2 & 7); +#else + *(c++) = (d & 0xf8) | (d >> 5 & 7); + *(c++) = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *(c++) = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void rgb565_bep(const uint16_t d, uint8_t *c) { +#ifdef __BIG_ENDIAN__ + *(c++) = (d >> 8 & 0xf8) | (d >> 13); + *(c++) = (d >> 3 & 0xfc) | (d >> 9 & 3); + *(c++) = (d << 3) | (d >> 2 & 7); +#else + *(c++) = (d & 0xf8) | (d >> 5 & 7); + *(c++) = (d << 5 & 0xe0) | (d >> 11 & 0x1c) | (d >> 1 & 3); + *(c++) = (d >> 5 & 0xf8) | (d >> 10 & 0x7); +#endif +} + +static inline void copy_block_buffer(const long bx, const long by, const long w, const long h, const long bw, + const long bh, const uint32_t *buffer, uint32_t *image) { + long x = bw * bx; + long xl = (bw * (bx + 1) > w ? w - bw * bx : bw) * 4; + const uint32_t *buffer_end = buffer + bw * bh; + for (long y = by * bh; buffer < buffer_end && y < h; buffer += bw, y++) + memcpy(image + y * w + x, buffer, xl); +} + +#endif /* end of include guard: COLOR_H */ diff --git a/Texture2DDecoder/crunch.cpp b/Texture2DDecoder/crunch.cpp new file mode 100644 index 0000000..714b852 --- /dev/null +++ b/Texture2DDecoder/crunch.cpp @@ -0,0 +1,34 @@ +#include "crunch.h" +#include +#include +#include "crunch/crn_decomp.h" + +bool crunch_unpack_level(const uint8_t* data, uint32_t data_size, uint32_t level_index, void** ret, uint32_t* ret_size) { + crnd::crn_texture_info tex_info; + if (!crnd::crnd_get_texture_info(data, data_size, &tex_info)) + { + return false; + } + + crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(data, data_size); + if (!pContext) + { + return false; + } + + const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); + const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); + const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); + const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); + const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); + const crn_uint32 total_face_size = row_pitch * blocks_y; + *ret = new uint8_t[total_face_size]; + *ret_size = total_face_size; + if (!crnd::crnd_unpack_level(pContext, ret, total_face_size, row_pitch, level_index)) + { + crnd::crnd_unpack_end(pContext); + return false; + } + crnd::crnd_unpack_end(pContext); + return true; +} \ No newline at end of file diff --git a/Texture2DDecoder/crunch.h b/Texture2DDecoder/crunch.h new file mode 100644 index 0000000..21ac55c --- /dev/null +++ b/Texture2DDecoder/crunch.h @@ -0,0 +1,5 @@ +#pragma once + +#include + +bool crunch_unpack_level(const uint8_t* data, uint32_t data_size, uint32_t level_index, void** ret, uint32_t* ret_size); \ No newline at end of file diff --git a/Texture2DDecoder/crunch/crn_decomp.h b/Texture2DDecoder/crunch/crn_decomp.h new file mode 100644 index 0000000..53311cd --- /dev/null +++ b/Texture2DDecoder/crunch/crn_decomp.h @@ -0,0 +1,4843 @@ +// File: crn_decomp.h - Fast CRN->DXTc texture transcoder header file library +// Copyright (c) 2010-2016 Richard Geldreich, Jr. All rights reserved. +// See Copyright Notice and license at the end of this file. +// +// This single header file contains *all* of the code necessary to unpack .CRN files to raw DXTn bits. +// It does NOT depend on the crn compression library. +// +// Note: This is a single file, stand-alone C++ library which is controlled by the use of two macros: +// If CRND_INCLUDE_CRND_H is NOT defined, the header is included. +// If CRND_HEADER_FILE_ONLY is NOT defined, the implementation is included. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +#ifndef CRND_INCLUDE_CRND_H +#define CRND_INCLUDE_CRND_H + +// Include crnlib.h (only to bring in some basic CRN-related types). +#include "crnlib.h" + +#define CRND_LIB_VERSION 104 +#define CRND_VERSION_STRING "01.04" + +#ifdef _DEBUG +#define CRND_BUILD_DEBUG +#else +#define CRND_BUILD_RELEASE +#endif + +// CRN decompression API +namespace crnd +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint32; + typedef unsigned int uint; + typedef signed int int32; + #ifdef __GNUC__ + typedef unsigned long long uint64; + typedef long long int64; + #else + typedef unsigned __int64 uint64; + typedef signed __int64 int64; + #endif + + // The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. + const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; + + // realloc callback: + // Used to allocate, resize, or free memory blocks. + // If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. + // *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. + // If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. + // If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: + // If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the + // underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. + // If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there + // is not enough memory to relocate the block. + // In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. + typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); + + // msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. + typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); + + // crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. + // If any input parameters are NULL, the memory callback functions are reset to the default functions. + // The default functions call malloc(), free(), _msize(), _expand(), etc. + void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); + + struct crn_file_info + { + inline crn_file_info() : m_struct_size(sizeof(crn_file_info)) { } + + uint32 m_struct_size; + uint32 m_actual_data_size; + uint32 m_header_size; + uint32 m_total_palette_size; + uint32 m_tables_size; + uint32 m_levels; + uint32 m_level_compressed_size[cCRNMaxLevels]; + uint32 m_color_endpoint_palette_entries; + uint32 m_color_selector_palette_entries; + uint32 m_alpha_endpoint_palette_entries; + uint32 m_alpha_selector_palette_entries; + }; + + struct crn_texture_info + { + inline crn_texture_info() : m_struct_size(sizeof(crn_texture_info)) { } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_levels; + uint32 m_faces; + uint32 m_bytes_per_block; + uint32 m_userdata0; + uint32 m_userdata1; + crn_format m_format; + }; + + struct crn_level_info + { + inline crn_level_info() : m_struct_size(sizeof(crn_level_info)) { } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_faces; + uint32 m_blocks_x; + uint32 m_blocks_y; + uint32 m_bytes_per_block; + crn_format m_format; + }; + + // Returns the FOURCC format code corresponding to the specified CRN format. + uint32 crnd_crn_format_to_fourcc(crn_format fmt); + + // Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. + crn_format crnd_get_fundamental_dxt_format(crn_format fmt); + + // Returns the size of the crn_format in bits/texel (either 4 or 8). + uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); + + // Returns the number of bytes per DXTn block (8 or 16). + uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); + + // Validates the entire file by checking the header and data CRC's. + // This is not something you want to be doing much! + // The crn_file_info.m_struct_size field must be set before calling this function. + bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); + + // Retrieves texture information from the CRN file. + // The crn_texture_info.m_struct_size field must be set before calling this function. + bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); + + // Retrieves mipmap level specific information from the CRN file. + // The crn_level_info.m_struct_size field must be set before calling this function. + bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); + + // Transcode/unpack context handle. + typedef void* crnd_unpack_context; + + // crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. + // Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. + // Don't call this once per mip level (unless you absolutely must)! + // This function allocates enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). + // Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. + // pData must point to a buffer holding all of the compressed .CRN file data. + // This buffer must be stable until crnd_unpack_end() is called. + // Returns NULL if out of memory, or if any of the input parameters are invalid. + crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); + + // Returns a pointer to the compressed .CRN data associated with a crnd_unpack_context. + // Returns false if any of the input parameters are invalid. + bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); + + // crnd_unpack_level() - Transcodes the specified mipmap level to a destination buffer in cached or write combined memory. + // pContext - Context created by a call to crnd_unpack_begin(). + // ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. + // dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. + // row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. + // level_index - mipmap level index, where 0 is the largest/first level. + // Returns false if any of the input parameters, or the compressed stream, are invalid. + // This function does not allocate any memory. + bool crnd_unpack_level( + crnd_unpack_context pContext, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. + // See the crnd_create_segmented_file() API below. + // Segmented files allow the user to control where the compressed mipmap data is stored. + bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified unpack context. + // Returns false if the context is NULL, or if it points to an invalid context. + // This function frees all memory associated with the context. + bool crnd_unpack_end(crnd_unpack_context pContext); + + // The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: + // - Base data: Header + compression tables + // - Level data: Individual mipmap levels + // This allows mipmap levels from multiple CRN files to be tightly packed together into single files. + + // Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. + const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); + + // Returns the compressed size of the texture's header and compression tables (but no levels). + uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); + + // Creates a "segmented" CRN texture from a normal CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. + // base_data_size must be >= crnd_get_base_data_size(). + // The base data will contain the CRN header and compression tables, but no mipmap data. + bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); + +} // namespace crnd + +// Low-level CRN file header cracking. +namespace crnd +{ + template + struct crn_packed_uint + { + inline crn_packed_uint() { } + + inline crn_packed_uint(unsigned int val) { *this = val; } + + inline crn_packed_uint(const crn_packed_uint& other) { *this = other; } + + inline crn_packed_uint& operator= (const crn_packed_uint& rhs) + { + if (this != &rhs) + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + return *this; + } + + inline crn_packed_uint& operator= (unsigned int val) + { + //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); + + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) + { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const + { + switch (N) + { + case 1: return m_buf[0]; + case 2: return (m_buf[0] << 8U) | m_buf[1]; + case 3: return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; + }; + +#pragma pack(push) +#pragma pack(1) + struct crn_palette + { + crn_packed_uint<3> m_ofs; + crn_packed_uint<3> m_size; + crn_packed_uint<2> m_num; + }; + + enum crn_header_flags + { + // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. + cCRNHeaderFlagSegmented = 1 + }; + + struct crn_header + { + enum { cCRNSigValue = ('H' << 8) | 'x' }; + + crn_packed_uint<2> m_sig; + crn_packed_uint<2> m_header_size; + crn_packed_uint<2> m_header_crc16; + + crn_packed_uint<4> m_data_size; + crn_packed_uint<2> m_data_crc16; + + crn_packed_uint<2> m_width; + crn_packed_uint<2> m_height; + + crn_packed_uint<1> m_levels; + crn_packed_uint<1> m_faces; + + crn_packed_uint<1> m_format; + crn_packed_uint<2> m_flags; + + crn_packed_uint<4> m_reserved; + crn_packed_uint<4> m_userdata0; + crn_packed_uint<4> m_userdata1; + + crn_palette m_color_endpoints; + crn_palette m_color_selectors; + + crn_palette m_alpha_endpoints; + crn_palette m_alpha_selectors; + + crn_packed_uint<2> m_tables_size; + crn_packed_uint<3> m_tables_ofs; + + // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] + crn_packed_uint<4> m_level_ofs[1]; + }; + + const unsigned int cCRNHeaderMinSize = 62U; + +#pragma pack(pop) + +} // namespace crnd + +#endif // CRND_INCLUDE_CRND_H + +// Internal library source follows this line. + +#ifndef CRND_HEADER_FILE_ONLY + +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include +#include // needed for placement new, _msize, _expand + +#define CRND_RESTRICT __restrict + +#ifdef _MSC_VER +#include +#pragma intrinsic(_WriteBarrier) +#pragma intrinsic(_ReadWriteBarrier) +#define CRND_WRITE_BARRIER _WriteBarrier(); +#define CRND_FULL_BARRIER _ReadWriteBarrier(); +#else +#define CRND_WRITE_BARRIER +#define CRND_FULL_BARRIER +#endif + +#ifdef _MSC_VER +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + +#ifdef CRND_DEVEL +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x500 +#endif +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef +#define NOMINMAX +#endif +#include "windows.h" // only for IsDebuggerPresent(), DebugBreak(), and OutputDebugStringA() +#endif + +// File: crnd_types.h +namespace crnd +{ + const crn_uint8 cUINT8_MIN = 0; + const crn_uint8 cUINT8_MAX = 0xFFU; + const uint16 cUINT16_MIN = 0; + const uint16 cUINT16_MAX = 0xFFFFU; + const uint32 cUINT32_MIN = 0; + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + + const int8 cINT8_MIN = -128; + const int8 cINT8_MAX = 127; + const int16 cINT16_MIN = -32768; + const int16 cINT16_MAX = 32767; + const int32 cINT32_MIN = (-2147483647 - 1); + const int32 cINT32_MAX = 2147483647; + + enum eClear { cClear }; + + const uint32 cIntBits = 32U; + +#ifdef _WIN64 + typedef uint64 ptr_bits; +#else + #ifdef __x86_64__ + typedef uint64 ptr_bits; + #else + typedef uint32 ptr_bits; + #endif +#endif + + template struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = crnd::cINT8_MIN, cMax = crnd::cINT8_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = crnd::cINT16_MIN, cMax = crnd::cINT16_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT8_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT16_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT32_MAX, cSigned = false }; }; + + struct empty_type { }; + +} // namespace crnd + +// File: crnd_platform.h +namespace crnd +{ +#ifdef _XBOX + const bool c_crnd_little_endian_platform = false; + const bool c_crnd_big_endian_platform = true; +#define CRND_BIG_ENDIAN_PLATFORM 1 +#else + const bool c_crnd_little_endian_platform = true; + const bool c_crnd_big_endian_platform = false; +#endif + + bool crnd_is_debugger_present(); + void crnd_debug_break(); + void crnd_output_debug_string(const char* p); + + // actually in crnd_assert.cpp + void crnd_assert(const char* pExp, const char* pFile, unsigned line); + void crnd_fail(const char* pExp, const char* pFile, unsigned line); + +} // namespace crnd + +// File: crnd_assert.h +namespace crnd +{ + void crnd_assert(const char* pExp, const char* pFile, unsigned line); + +#ifdef NDEBUG +#define CRND_ASSERT(x) ((void)0) +#undef CRND_ASSERTS_ENABLED +#else +#define CRND_ASSERT(_exp) (void)( (!!(_exp)) || (crnd::crnd_assert(#_exp, __FILE__, __LINE__), 0) ) +#define CRND_ASSERTS_ENABLED +#endif + + void crnd_trace(const char* pFmt, va_list args); + void crnd_trace(const char* pFmt, ...); + +} // namespace crnd + +// File: crnd_helpers.h +namespace crnd +{ + namespace helpers + { + template struct rel_ops + { + friend bool operator!= (const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<= (const T& x, const T& y) { return (!(y < x)); } + friend bool operator>= (const T& x, const T& y) { return (!(x < y)); } + }; + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + void construct_array(T* p, uint32 n) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T; + } + + template + void construct_array(T* p, uint32 n, const U& init) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + p; + p->~T(); + } + + template inline void destruct_array(T* p, uint32 n) + { + T* q = p + n; + for ( ; p != q; ++p) + p->~T(); + } + + } // namespace helpers + +} // namespace crnd + +// File: crnd_traits.h +namespace crnd +{ + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { helpers::construct(p); } + static inline void construct(T* p, const T& init) { helpers::construct(p, init); } + static inline void construct_array(T* p, uint32 n) { helpers::construct_array(p, n); } + static inline void destruct(T* p) { helpers::destruct(p); } + static inline void destruct_array(T* p, uint32 n) { helpers::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, uint32 n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, uint32 n) { p, n; } + }; + +#define CRND_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint32 n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, uint32 n) { p, n; } }; + + CRND_DEFINE_BUILT_IN_TYPE(bool) + CRND_DEFINE_BUILT_IN_TYPE(char) + CRND_DEFINE_BUILT_IN_TYPE(unsigned char) + CRND_DEFINE_BUILT_IN_TYPE(short) + CRND_DEFINE_BUILT_IN_TYPE(unsigned short) + CRND_DEFINE_BUILT_IN_TYPE(int) + CRND_DEFINE_BUILT_IN_TYPE(unsigned int) + CRND_DEFINE_BUILT_IN_TYPE(long) + CRND_DEFINE_BUILT_IN_TYPE(unsigned long) + CRND_DEFINE_BUILT_IN_TYPE(int64) + CRND_DEFINE_BUILT_IN_TYPE(uint64) + CRND_DEFINE_BUILT_IN_TYPE(float) + CRND_DEFINE_BUILT_IN_TYPE(double) + CRND_DEFINE_BUILT_IN_TYPE(long double) + +#undef CRND_DEFINE_BUILT_IN_TYPE + + // See: http://erdani.org/publications/cuj-2004-06.pdf + + template + struct bitwise_movable { enum { cFlag = false }; }; + + // Defines type Q as bitwise movable. +#define CRND_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis + + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + +#define CRND_IS_POD(T) __is_pod(T) + +} // namespace crnd + +// File: crnd_mem.h +namespace crnd +{ + void* crnd_malloc(size_t size, size_t* pActual_size = NULL); + void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + void crnd_free(void* p); + size_t crnd_msize(void* p); + + template + inline T* crnd_new() + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p); + } + + template + inline T* crnd_new(const T& init) + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p, init); + } + + template + inline T* crnd_new_array(uint32 num) + { + if (!num) num = 1; + + uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); + if (!q) + return NULL; + + T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + helpers::construct_array(p, num); + return p; + } + + template + inline void crnd_delete(T* p) + { + if (p) + { + helpers::destruct(p); + crnd_free(p); + } + } + + template + inline void crnd_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + const uint32 num_check = reinterpret_cast(p)[-2]; + num_check; + CRND_ASSERT(num && (num == ~num_check)); + + helpers::destruct_array(p, num); + + crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); + } + } + +} // namespace crnd + +// File: crnd_math.h +namespace crnd +{ + namespace math + { + const float cNearlyInfinite = 1.0e+37f; + + const float cDegToRad = 0.01745329252f; + const float cRadToDeg = 57.29577951f; + + extern uint32 g_bitmasks[32]; + + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template inline T minimum(T a, T b) + { + return (a < b) ? a : b; + } + + template inline T minimum(T a, T b, T c) + { + return minimum(minimum(a, b), c); + } + + template inline T maximum(T a, T b) + { + return (a > b) ? a : b; + } + + template inline T maximum(T a, T b, T c) + { + return maximum(maximum(a, b), c); + } + + template inline T clamp(T value, T low, T high) + { + return (value < low) ? low : ((value > high) ? high : value); + } + + template inline T square(T value) + { + return value * value; + } + + inline bool is_power_of_2(uint32 x) + { + return x && ((x & (x - 1U)) == 0U); + } + + // From "Hackers Delight" + inline int next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + // Returns the total number of bits needed to encode v. + inline uint32 total_bits(uint32 v) + { + uint32 l = 0; + while (v > 0U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; + } + } +} + +// File: crnd_utils.h +namespace crnd +{ + namespace utils + { + template inline void zero_object(T& obj) + { + memset(&obj, 0, sizeof(obj)); + } + + template inline void zero_this(T* pObj) + { + memset(pObj, 0, sizeof(*pObj)); + } + + template + inline void swap(T& left, T& right) + { + T temp(left); + left = right; + right = temp; + } + + inline void invert_buf(void* pBuf, uint32 size) + { + uint8* p = static_cast(pBuf); + + const uint32 half_size = size >> 1; + for (uint32 i = 0; i < half_size; i++) + swap(p[i], p[size - 1U - i]); + } + + static inline uint16 swap16(uint16 x) { return static_cast((x << 8) | (x >> 8)); } + static inline uint32 swap32(uint32 x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | (( x >> 8) & 0x0000FF00) | (x >> 24)); } + + uint32 compute_max_mips(uint32 width, uint32 height); + + } // namespace utils + +} // namespace crnd + +// File: crnd_vector.h +namespace crnd +{ + struct elemental_vector + { + void* m_p; + uint32 m_size; + uint32 m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); + + bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); + }; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + + template + class vector : public helpers::rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + *this = other; + } + + inline vector(uint32 size) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + resize(size); + } + + inline ~vector() + { + clear(); + } + + // I don't like this. Not at all. But exceptions, or just failing suck worse. + inline bool get_alloc_failed() const { return m_alloc_failed; } + inline void clear_alloc_failed() { m_alloc_failed = false; } + + inline bool assign(const vector& other) + { + if (this == &other) + return true; + + if (m_capacity == other.m_size) + resize(0); + else + { + clear(); + + if (!increase_capacity(other.m_size, false)) + return false; + } + + if (scalar_type::cFlag) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32 i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return true; + } + + inline vector& operator= (const vector& other) + { + assign(other); + return *this; + } + + inline const T* begin() const { return m_p; } + T* begin() { return m_p; } + + inline const T* end() const { return m_p + m_size; } + T* end() { return m_p + m_size; } + + inline bool empty() const { return !m_size; } + inline uint32 size() const { return m_size; } + inline uint32 capacity() const { return m_capacity; } + + inline const T& operator[] (uint32 i) const { CRND_ASSERT(i < m_size); return m_p[i]; } + inline T& operator[] (uint32 i) { CRND_ASSERT(i < m_size); return m_p[i]; } + + inline const T& front() const { CRND_ASSERT(m_size); return m_p[0]; } + inline T& front() { CRND_ASSERT(m_size); return m_p[0]; } + + inline const T& back() const { CRND_ASSERT(m_size); return m_p[m_size - 1]; } + inline T& back() { CRND_ASSERT(m_size); return m_p[m_size - 1]; } + + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnd_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + + m_alloc_failed = false; + } + + inline bool reserve(uint32 new_capacity) + { + if (!increase_capacity(new_capacity, false)) + return false; + + return true; + } + + inline bool resize(uint32 new_size) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, new_size == (m_size + 1))) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + inline bool push_back(const T& obj) + { + CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void pop_back() + { + CRND_ASSERT(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline void insert(uint32 index, const T* p, uint32 n) + { + CRND_ASSERT(index <= m_size); + if (!n) + return; + + const uint32 orig_size = m_size; + resize(m_size + n); + + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + const uint32 num_to_move = orig_size - index; + + for (uint32 i = 0; i < num_to_move; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + + pSrc = p; + pDst = m_p + index; + + for (uint32 i = 0; i < n; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + + inline void erase(uint32 start, uint32 n) + { + CRND_ASSERT((start + n) <= m_size); + + if (!n) + return; + + const uint32 num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + T* pDst_end = pDst + num_to_move; + const T* pSrc = m_p + start + n; + + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + + m_size -= n; + } + + inline void erase(uint32 index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(p - m_p); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32 i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint32 min_size = math::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + void swap(vector& other) + { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } + + private: + T* m_p; + uint32 m_size; + uint32 m_capacity; + bool m_alloc_failed; + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + helpers::construct(pDst, *pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } + + inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) + { + if (!reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) + { + m_alloc_failed = true; + return false; + } + return true; + } + }; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + extern void vector_test(); + +} // namespace crnd + +// File: crnd_private.h +namespace crnd +{ + const crn_header* crnd_get_header(crn_header& header, const void* pData, uint32 data_size); + +} // namespace crnd + +// File: checksum.h +namespace crnd +{ + // crc16() intended for small buffers - doesn't use an acceleration table. + const uint16 cInitCRC16 = 0; + uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); + +} // namespace crnd + +// File: crnd_color.h +namespace crnd +{ + template struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT8_MIN, + cMax = cUINT8_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT16_MIN, + cMax = cINT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT16_MIN, + cMax = cUINT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT32_MIN, + cMax = cUINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // warning C4201: nonstandard extension used : nameless struct/union +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + + template + class color_quad : public helpers::rel_ops > + { + static parameter_type clamp(parameter_type v) + { + if (component_traits::cFloat) + return v; + else + { + if (v < component_traits::cMin) + return component_traits::cMin; + else if (v > component_traits::cMax) + return component_traits::cMax; + return v; + } + } + + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; + + enum { cNumComps = 4 }; + + union + { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; + + component_type c[cNumComps]; + }; + + inline color_quad() + { + } + + inline color_quad(eClear) : + r(0), g(0), b(0), a(0) + { + } + + inline color_quad(const color_quad& other) : + r(other.r), g(other.g), b(other.b), a(other.a) + { + } + + inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) + { + set(y, alpha); + } + + inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + template + inline color_quad(const color_quad& other) : + r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) + { + } + + inline void clear() + { + r = 0; + g = 0; + b = 0; + a = 0; + } + + inline color_quad& operator= (const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } + + template + inline color_quad& operator=(const color_quad& other) + { + r = clamp(other.r); + g = clamp(other.g); + b = clamp(other.b); + a = clamp(other.a); + return *this; + } + + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) + { + y = clamp(y); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static inline parameter_type get_min_comp() { return component_traits::cMin; } + static inline parameter_type get_max_comp() { return component_traits::cMax; } + static inline bool get_comps_are_signed() { return component_traits::cSigned; } + + inline component_type operator[] (uint32 i) const { CRND_ASSERT(i < cNumComps); return c[i]; } + inline component_type& operator[] (uint32 i) { CRND_ASSERT(i < cNumComps); return c[i]; } + + inline color_quad& set_component(uint32 i, parameter_type f) + { + CRND_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + inline color_quad& clamp(const color_quad& l, const color_quad& h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + return *this; + } + + inline color_quad& clamp(parameter_type l, parameter_type h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + inline uint32 squared_distance(const color_quad& c, bool alpha = true) const + { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator== (const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } + + inline bool operator< (const color_quad& rhs) const + { + for (uint32 i = 0; i < cNumComps; i++) + { + if (c[i] < rhs.c[i]) + return true; + else if (!(c[i] == rhs.c[i])) + return false; + } + return false; + } + + inline color_quad& operator+= (const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } + + inline color_quad& operator-= (const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } + + inline color_quad& operator*= (parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] * v)); + return *this; + } + + inline color_quad& operator/= (parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(c[i] / v); + return *this; + } + + inline color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const + { + CRND_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } + + inline friend color_quad operator+ (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result += rhs; + return result; + } + + inline friend color_quad operator- (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result -= rhs; + return result; + } + + inline friend color_quad operator* (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result *= v; + return result; + } + + friend inline color_quad operator/ (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result /= v; + return result; + } + + friend inline color_quad operator* (parameter_type v, const color_quad& rhs) + { + color_quad result(rhs); + result *= v; + return result; + } + + inline uint32 get_min_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] < c[index]) + index = i; + return index; + } + + inline uint32 get_max_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] > c[index]) + index = i; + return index; + } + + inline void get_float4(float* pDst) + { + for (uint32 i = 0; i < 4; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + inline void get_float3(float* pDst) + { + for (uint32 i = 0; i < 3; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + static inline color_quad make_black() + { + return color_quad(0, 0, 0, component_traits::cMax); + } + + static inline color_quad make_white() + { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } + }; // class color_quad + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + struct scalar_type< color_quad > + { + enum { cFlag = true }; + static inline void construct(color_quad* p) { } + static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } + static inline void construct_array(color_quad* p, uint32 n) { p, n; } + static inline void destruct(color_quad* p) { p; } + static inline void destruct_array(color_quad* p, uint32 n) { p, n; } + }; + + typedef color_quad color_quad_u8; + typedef color_quad color_quad_i16; + typedef color_quad color_quad_u16; + typedef color_quad color_quad_i32; + typedef color_quad color_quad_u32; + typedef color_quad color_quad_f; + typedef color_quad color_quad_d; + +} // namespace crnd + +// File: crnd_dxt.h +namespace crnd +{ + enum dxt_format + { + cDXTInvalid = -1, + + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, + + cDXT3, + cDXT5, + cDXT5A, + + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX // standard ATI2 + }; + + enum dxt_constants + { + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift, + + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, + + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, + + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U + }; + + const float cDXT1MaxLinearValue = 3.0f; + const float cDXT1InvMaxLinearValue = 1.0f/3.0f; + + const float cDXT5MaxLinearValue = 7.0f; + const float cDXT5InvMaxLinearValue = 1.0f/7.0f; + + // Converts DXT1 raw color selector index to a linear value. + extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; + + // Converts DXT5 raw alpha selector index to a linear value. + extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + + // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). + extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; + + // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). + extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; + + extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; + extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; + + struct dxt1_block + { + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + // These methods assume the in-memory rep is in LE byte order. + inline uint32 get_low_color() const + { + return m_low_color[0] | (m_low_color[1] << 8U); + } + + inline uint32 get_high_color() const + { + return m_high_color[0] | (m_high_color[1] << 8U); + } + + inline void set_low_color(uint16 c) + { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline void set_high_color(uint16 c) + { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } + + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); + static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); + + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); + static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); + + static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + // pDst must point to an array at least cDXT1SelectorValues long. + static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); + + struct dxt3_block + { + enum { cNumAlphaBytes = 8 }; + uint8 m_alpha[cNumAlphaBytes]; + + void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); + uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); + + struct dxt5_block + { + uint8 m_endpoints[2]; + + enum { cNumSelectorBytes = 6 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint32 get_low_alpha() const + { + return m_endpoints[0]; + } + + inline uint32 get_high_alpha() const + { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + uint32 get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + + uint32 get_selectors_as_word(uint32 index) { CRND_ASSERT(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } + + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + + // Results written to alpha channel. + static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); + + static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); + + static uint32 unpack_endpoint(uint32 packed, uint32 index); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); + +} // namespace crnd + +// File: crnd_dxt_hc_common.h +namespace crnd +{ + struct chunk_tile_desc + { + // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. + uint32 m_x_ofs; + uint32 m_y_ofs; + uint32 m_width; + uint32 m_height; + uint32 m_layout_index; + }; + + struct chunk_encoding_desc + { + uint32 m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + + const uint32 cChunkPixelWidth = 8; + const uint32 cChunkPixelHeight = 8; + const uint32 cChunkBlockWidth = 2; + const uint32 cChunkBlockHeight = 2; + + const uint32 cChunkMaxTiles = 4; + + const uint32 cBlockPixelWidthShift = 2; + const uint32 cBlockPixelHeightShift = 2; + + const uint32 cBlockPixelWidth = 4; + const uint32 cBlockPixelHeight = 4; + + const uint32 cNumChunkEncodings = 8; + extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; + + const uint32 cNumChunkTileLayouts = 9; + const uint32 cFirst4x4ChunkTileLayout = 5; + extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; + +} // namespace crnd + +// File: crnd_prefix_coding.h +#ifdef _XBOX +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 +#else +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 +#endif + +namespace crnd +{ + namespace prefix_coding + { + const uint32 cMaxExpectedCodeSize = 16; + const uint32 cMaxSupportedSyms = 8192; + const uint32 cMaxTableBits = 11; + + class decoder_tables + { + public: + inline decoder_tables() : + m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + } + + inline decoder_tables(const decoder_tables& other) : + m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + *this = other; + } + + decoder_tables& operator= (const decoder_tables& other) + { + if (this == &other) + return *this; + + clear(); + + memcpy(this, &other, sizeof(*this)); + + if (other.m_lookup) + { + m_lookup = crnd_new_array(m_cur_lookup_size); + if (m_lookup) + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + if (other.m_sorted_symbol_order) + { + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (m_sorted_symbol_order) + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + + return *this; + } + + inline void clear() + { + if (m_lookup) + { + crnd_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + crnd_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() + { + if (m_lookup) + crnd_delete_array(m_lookup); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + } + + bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); + + // DO NOT use any complex classes here - it is bitwise copied. + + uint32 m_num_syms; + uint32 m_total_used_syms; + uint32 m_table_bits; + uint32 m_table_shift; + uint32 m_table_max_code; + uint32 m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint32 m_max_codes[cMaxExpectedCodeSize + 1]; + int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint32 m_cur_lookup_size; + uint32* m_lookup; + + uint32 m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint32 get_unshifted_max_code(uint32 len) const + { + CRND_ASSERT( (len >= 1) && (len <= cMaxExpectedCodeSize) ); + uint32 k = m_max_codes[len - 1]; + if (!k) + return crnd::cUINT32_MAX; + return (k - 1) >> (16 - len); + } + }; + + } // namespace prefix_coding + +} // namespace crnd + +// File: crnd_symbol_codec.h +namespace crnd +{ + class static_huffman_data_model + { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); + + static_huffman_data_model& operator= (const static_huffman_data_model& rhs); + + bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); + void clear(); + + inline bool is_valid() const { return m_pDecode_tables != NULL; } + + inline uint32 get_total_syms() const { return m_total_syms; } + + inline uint32 get_code_size(uint32 sym) const { return m_code_sizes[sym]; } + + inline const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + + public: + uint32 m_total_syms; + crnd::vector m_code_sizes; + prefix_coding::decoder_tables* m_pDecode_tables; + + private: + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; + + friend class symbol_codec; + }; + + class symbol_codec + { + public: + symbol_codec(); + + bool start_decoding(const uint8* pBuf, uint32 buf_size); + bool decode_receive_static_data_model(static_huffman_data_model& model); + + uint32 decode_bits(uint32 num_bits); + uint32 decode(const static_huffman_data_model& model); + + uint64 stop_decoding(); + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + uint32 m_decode_buf_size; + + typedef uint32 bit_buf_type; + enum { cBitBufSize = 32U }; + bit_buf_type m_bit_buf; + + int m_bit_count; + + private: + void get_bits_init(); + uint32 get_bits(uint32 num_bits); + }; + +} // namespace crnd + +#define CRND_HUFF_DECODE_BEGIN(x) +#define CRND_HUFF_DECODE_END(x) +#define CRND_HUFF_DECODE(codec, model, symbol) symbol = codec.decode(model); + +namespace crnd +{ + void crnd_assert(const char* pExp, const char* pFile, unsigned line) + { + char buf[512]; + +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#else + sprintf(buf, "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#endif + + crnd_output_debug_string(buf); + + puts(buf); + + if (crnd_is_debugger_present()) + crnd_debug_break(); + } + + void crnd_trace(const char* pFmt, va_list args) + { + if (crnd_is_debugger_present()) + { + char buf[512]; +#if defined(_WIN32) && defined(_MSC_VER) + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsprintf(buf, pFmt, args); +#endif + + crnd_output_debug_string(buf); + } + }; + + void crnd_trace(const char* pFmt, ...) + { + va_list args; + va_start(args, pFmt); + crnd_trace(pFmt, args); + va_end(args); + }; + +} // namespace crnd + +// File: checksum.cpp +// From the public domain stb.h header. +namespace crnd +{ + uint16 crc16(const void* pBuf, uint32 len, uint16 crc) + { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) + { + const uint16 q = *p++ ^ (crc >> 8U); + crc <<= 8U; + + uint16 r = (q >> 4U) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + + len--; + } + + return static_cast(~crc); + } + +} // namespace crnd + + +// File: crnd_vector.cpp +namespace crnd +{ + bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) + { + CRND_ASSERT(m_size <= m_capacity); + CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + uint32 new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(new_capacity))) + new_capacity = math::next_pow2(new_capacity); + + CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const uint32 desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + return false; + m_p = new_p; + } + else + { + void* new_p = crnd_malloc(desired_size, &actual_size); + if (!new_p) + return false; + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + crnd_free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = new_capacity; + + return true; + } + +} // namespace crnd + +// File: crnd_utils.cpp +namespace crnd +{ + namespace utils + { + uint32 compute_max_mips(uint32 width, uint32 height) + { + if ((width | height) == 0) + return 0; + + uint32 num_mips = 1; + + while ((width > 1U) || (height > 1U)) + { + width >>= 1U; + height >>= 1U; + num_mips++; + } + + return num_mips; + } + + } // namespace utils + +} // namespace crnd + +// File: crnd_prefix_coding.cpp +namespace crnd +{ + namespace prefix_coding + { + bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) + { + uint32 min_codes[cMaxExpectedCodeSize]; + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + m_num_syms = num_syms; + + uint32 num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + num_codes[c]++; + } + + uint32 sorted_positions[cMaxExpectedCodeSize + 1]; + + uint32 cur_code = 0; + + uint32 total_used_syms = 0; + uint32 max_code_size = 0; + uint32 min_code_size = cUINT32_MAX; + for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint32 n = num_codes[i]; + + if (!n) + m_max_codes[i - 1] = 0;//UINT_MAX; + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = cur_code; + + m_max_codes[i - 1] = cur_code + n - 1; + m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + + m_val_ptrs[i - 1] = total_used_syms; + + sorted_positions[i] = total_used_syms; + + cur_code += n; + total_used_syms += n; + } + + cur_code <<= 1; + } + + m_total_used_syms = total_used_syms; + + if (total_used_syms > m_cur_sorted_symbol_order_size) + { + m_cur_sorted_symbol_order_size = total_used_syms; + + if (!math::is_power_of_2(total_used_syms)) + m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (!m_sorted_symbol_order) + return false; + } + + m_min_code_size = static_cast(min_code_size); + m_max_code_size = static_cast(max_code_size); + + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + { + CRND_ASSERT(num_codes[c]); + + uint32 sorted_pos = sorted_positions[c]++; + + CRND_ASSERT(sorted_pos < total_used_syms); + + m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= m_min_code_size) + table_bits = 0; + m_table_bits = table_bits; + + if (table_bits) + { + uint32 table_size = 1 << table_bits; + if (table_size > m_cur_lookup_size) + { + m_cur_lookup_size = table_size; + + if (m_lookup) + crnd_delete_array(m_lookup); + + m_lookup = crnd_new_array(table_size); + if (!m_lookup) + return false; + } + + memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); + + for (uint32 codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + continue; + + const uint32 fillsize = table_bits - codesize; + const uint32 fillnum = 1 << fillsize; + + const uint32 min_code = min_codes[codesize - 1]; + const uint32 max_code = get_unshifted_max_code(codesize); + const uint32 val_ptr = m_val_ptrs[codesize - 1]; + + for (uint32 code = min_code; code <= max_code; code++) + { + const uint32 sym_index = m_sorted_symbol_order[ val_ptr + code - min_code ]; + CRND_ASSERT( pCodesizes[sym_index] == codesize ); + + for (uint32 j = 0; j < fillnum; j++) + { + const uint32 t = j + (code << fillsize); + + CRND_ASSERT(t < (1U << table_bits)); + + CRND_ASSERT(m_lookup[t] == cUINT32_MAX); + + m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) + m_val_ptrs[i] -= min_codes[i]; + + m_table_max_code = 0; + m_decode_start_code_size = m_min_code_size; + + if (table_bits) + { + uint32 i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + m_table_max_code = m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + m_decode_start_code_size = table_bits + 1; + for (uint32 j = table_bits + 1; j <= max_code_size; j++) + { + if (num_codes[j]) + { + m_decode_start_code_size = j; + break; + } + } + } + } + + // sentinels + m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; + m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + m_table_shift = 32 - m_table_bits; + return true; + } + + } // namespace prefix_codig + +} // namespace crnd + +// File: crnd_platform.cpp +namespace crnd +{ + bool crnd_is_debugger_present() + { +#ifdef CRND_DEVEL + return IsDebuggerPresent() != 0; +#else + return false; +#endif + } + + void crnd_debug_break() + { +#ifdef CRND_DEVEL + DebugBreak(); +#endif + } + + void crnd_output_debug_string(const char* p) + { + p; +#ifdef CRND_DEVEL + OutputDebugStringA(p); +#endif + } + +} // namespace crnd + +// File: crnd_mem.cpp +namespace crnd +{ + const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + + static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data) + { + pUser_data; + + void* p_new; + + if (!p) + { + p_new = ::malloc(size); + + if (pActual_size) + { +#ifdef _WIN32 + *pActual_size = p_new ? ::_msize(p_new) : 0; +#else + *pActual_size = p_new ? malloc_usable_size(p_new) : 0; +#endif + } + } + else if (!size) + { + ::free(p); + p_new = NULL; + + if (pActual_size) + *pActual_size = 0; + } + else + { + void* p_final_block = p; +#ifdef _WIN32 + p_new = ::_expand(p, size); +#else + p_new = NULL; +#endif + + if (p_new) + p_final_block = p_new; + else if (movable) + { + p_new = ::realloc(p, size); + + if (p_new) + p_final_block = p_new; + } + + if (pActual_size) + { +#ifdef _WIN32 + *pActual_size = ::_msize(p_final_block); +#else + *pActual_size = ::malloc_usable_size(p_final_block); +#endif + } + } + + return p_new; + } + + static size_t crnd_default_msize(void* p, void* pUser_data) + { + pUser_data; +#ifdef _WIN32 + return p ? _msize(p) : 0; +#else + return p ? malloc_usable_size(p) : 0; +#endif + } + + static crnd_realloc_func g_pRealloc = crnd_default_realloc; + static crnd_msize_func g_pMSize = crnd_default_msize; + static void* g_pUser_data; + + void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) + { + if ((!pRealloc) || (!pMSize)) + { + g_pRealloc = crnd_default_realloc; + g_pMSize = crnd_default_msize; + g_pUser_data = NULL; + } + else + { + g_pRealloc = pRealloc; + g_pMSize = pMSize; + g_pUser_data = pUser_data; + } + } + + static inline void crnd_mem_error(const char* p_msg) + { + crnd_assert(p_msg, __FILE__, __LINE__); + } + + void* crnd_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + *pActual_size = actual_size; + + if ((!p_new) || (actual_size < size)) + { + crnd_mem_error("crnd_malloc: out of memory"); + return NULL; + } + + CRND_ASSERT(((uint32)p_new & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; + } + + void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_realloc: bad ptr"); + return NULL; + } + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + + if (pActual_size) + *pActual_size = actual_size; + + CRND_ASSERT(((uint32)p_new & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; + } + + void crnd_free(void* p) + { + if (!p) + return; + + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_free: bad ptr"); + return; + } + + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + } + + size_t crnd_msize(void* p) + { + if (!p) + return 0; + + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); + } + +} // namespace crnd + +// File: crnd_math.cpp +namespace crnd +{ + namespace math + { + uint32 g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U + }; + + } // namespace math +} // namespace crnd + +// File: crnd_info.cpp +namespace crnd +{ +#define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + + uint32 crnd_crn_format_to_fourcc(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: return CRND_FOURCC('D', 'X', 'T', '1'); + case cCRNFmtDXT3: return CRND_FOURCC('D', 'X', 'T', '3'); + case cCRNFmtDXT5: return CRND_FOURCC('D', 'X', 'T', '5'); + case cCRNFmtDXN_XY: return CRND_FOURCC('A', '2', 'X', 'Y'); + case cCRNFmtDXN_YX: return CRND_FOURCC('A', 'T', 'I', '2'); + case cCRNFmtDXT5A: return CRND_FOURCC('A', 'T', 'I', '1'); + case cCRNFmtDXT5_CCxY: return CRND_FOURCC('C', 'C', 'x', 'Y'); + case cCRNFmtDXT5_xGxR: return CRND_FOURCC('x', 'G', 'x', 'R'); + case cCRNFmtDXT5_xGBR: return CRND_FOURCC('x', 'G', 'B', 'R'); + case cCRNFmtDXT5_AGBR: return CRND_FOURCC('A', 'G', 'B', 'R'); + case cCRNFmtETC1: return CRND_FOURCC('E', 'T', 'C', '1'); + default: break; + } + CRND_ASSERT(false); + return 0; + } + + crn_format crnd_get_fundamental_dxt_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return cCRNFmtDXT5; + default: break; + } + return fmt; + } + + uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + case cCRNFmtDXT5A: + case cCRNFmtETC1: + return 4; + case cCRNFmtDXT3: + case cCRNFmtDXT5: + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return 8; + default: break; + } + CRND_ASSERT(false); + return 0; + } + + uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) + { + return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; + } + + // TODO: tmp_header isn't used/This function is a helper to support old headers. + const crn_header* crnd_get_header(crn_header& tmp_header, const void* pData, uint32 data_size) + { + tmp_header; + + if ((!pData) || (data_size < sizeof(crn_header))) + return NULL; + + const crn_header& file_header = *static_cast(pData); + if (file_header.m_sig != crn_header::cCRNSigValue) + return NULL; + + if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) + return NULL; + + return &file_header; + } + + bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) + { + if (pFile_info) + { + if (pFile_info->m_struct_size != sizeof(crn_file_info)) + return false; + + memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); + } + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); + if (header_crc != pHeader->m_header_crc16) + return false; + + const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); + if (data_crc != pHeader->m_data_crc16) + return false; + + if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) + return false; + if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) + return false; + if (((int)pHeader->m_format < cCRNFmtDXT1) || ((int)pHeader->m_format >= cCRNFmtTotal)) + return false; + + if (pFile_info) + { + pFile_info->m_actual_data_size = pHeader->m_data_size; + pFile_info->m_header_size = pHeader->m_header_size; + pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; + pFile_info->m_tables_size = pHeader->m_tables_size; + + pFile_info->m_levels = pHeader->m_levels; + + for (uint32 i = 0; i < pHeader->m_levels; i++) + { + uint32 next_ofs = pHeader->m_data_size; + + // assumes the levels are packed together sequentially + if ((i + 1) < pHeader->m_levels) + next_ofs = pHeader->m_level_ofs[i + 1]; + + pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; + } + + pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; + pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num;; + pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num;; + pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num;; + } + + return true; + } + + bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) + { + if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) + return false; + + if (pInfo->m_struct_size != sizeof(crn_texture_info)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + pInfo->m_width = pHeader->m_width; + pInfo->m_height = pHeader->m_height; + pInfo->m_levels = pHeader->m_levels; + pInfo->m_faces = pHeader->m_faces; + pInfo->m_format = static_cast((uint32)pHeader->m_format); + pInfo->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pInfo->m_userdata0 = pHeader->m_userdata0; + pInfo->m_userdata1 = pHeader->m_userdata1; + + return true; + } + + bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) + { + if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) + return false; + + if (pLevel_info->m_struct_size != sizeof(crn_level_info)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + if (level_index >= pHeader->m_levels) + return false; + + uint32 width = math::maximum(1U, pHeader->m_width >> level_index); + uint32 height = math::maximum(1U, pHeader->m_height >> level_index); + + pLevel_info->m_width = width; + pLevel_info->m_height = height; + pLevel_info->m_faces = pHeader->m_faces; + pLevel_info->m_blocks_x = (width + 3) >> 2; + pLevel_info->m_blocks_y = (height + 3) >> 2; + pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pLevel_info->m_format = static_cast((uint32)pHeader->m_format); + + return true; + } + + const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) + { + if (pSize) + *pSize = 0; + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return NULL; + + if (level_index >= pHeader->m_levels) + return NULL; + + uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; + + if (pSize) + { + uint32 next_level_ofs = data_size; + if ((level_index + 1) < (pHeader->m_levels)) + next_level_ofs = pHeader->m_level_ofs[level_index + 1]; + + *pSize = next_level_ofs - cur_level_ofs; + } + + return static_cast(pData) + cur_level_ofs; + } + + uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + uint32 size = pHeader->m_header_size; + + size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); + size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); + size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); + size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); + size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); + + return size; + } + + bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + if (pHeader->m_flags & cCRNHeaderFlagSegmented) + return false; + + const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); + if (base_data_size < actual_base_data_size) + return false; + + memcpy(pBase_data, pData, actual_base_data_size); + + crn_header& new_header = *static_cast(pBase_data); + new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; + new_header.m_data_size = actual_base_data_size; + + new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); + + new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); + + CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); + + return true; + } + +} // namespace crnd + +// File: symbol_codec.cpp +namespace crnd +{ + static_huffman_data_model::static_huffman_data_model() : +m_total_syms(0), +m_pDecode_tables(NULL) +{ +} + +static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : +m_total_syms(0), +m_pDecode_tables(NULL) +{ + *this = other; +} + +static_huffman_data_model::~static_huffman_data_model() +{ + if (m_pDecode_tables) + crnd_delete(m_pDecode_tables); +} + +static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) +{ + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + m_code_sizes = rhs.m_code_sizes; + if (m_code_sizes.get_alloc_failed()) + { + clear(); + return *this; + } + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); + } + else + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + return *this; +} + +void static_huffman_data_model::clear() +{ + m_total_syms = 0; + m_code_sizes.clear(); + if (m_pDecode_tables) + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } +} + +bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) +{ + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + if (!m_code_sizes.resize(total_syms)) + return false; + + uint32 min_code_size = cUINT32_MAX; + uint32 max_code_size = 0; + + for (uint32 i = 0; i < total_syms; i++) + { + uint32 s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } + + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + return false; + + if (max_code_size > code_size_limit) + return false; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) + return false; + + return true; +} + +bool static_huffman_data_model::prepare_decoder_tables() +{ + uint32 total_syms = m_code_sizes.size(); + + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + + m_total_syms = total_syms; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); +} + +uint static_huffman_data_model::compute_decoder_table_bits() const +{ +#if CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE + return prefix_coding::cMaxTableBits; +#else + uint32 decoder_table_bits = 0; + if (m_total_syms > 16) + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + return decoder_table_bits; +#endif +} + +symbol_codec::symbol_codec() : + m_pDecode_buf(NULL), + m_pDecode_buf_next(NULL), + m_pDecode_buf_end(NULL), + m_decode_buf_size(0), + m_bit_buf(0), + m_bit_count(0) +{ +} + +// Code length encoding symbols: +// 0-16 - actual code lengths +const uint32 cMaxCodelengthCodes = 21; + +const uint32 cSmallZeroRunCode = 17; +const uint32 cLargeZeroRunCode = 18; +const uint32 cSmallRepeatCode = 19; +const uint32 cLargeRepeatCode = 20; + +const uint32 cMinSmallZeroRunSize = 3; +const uint32 cMaxSmallZeroRunSize = 10; +const uint32 cMinLargeZeroRunSize = 11; +const uint32 cMaxLargeZeroRunSize = 138; + +const uint32 cSmallMinNonZeroRunSize = 3; +const uint32 cSmallMaxNonZeroRunSize = 6; +const uint32 cLargeMinNonZeroRunSize = 7; +const uint32 cLargeMaxNonZeroRunSize = 70; + +const uint32 cSmallZeroRunExtraBits = 3; +const uint32 cLargeZeroRunExtraBits = 7; +const uint32 cSmallNonZeroRunExtraBits = 2; +const uint32 cLargeNonZeroRunExtraBits = 6; + +static const uint8 g_most_probable_codelength_codes[] = +{ + cSmallZeroRunCode, cLargeZeroRunCode, + cSmallRepeatCode, cLargeRepeatCode, + + 0, 8, + 7, 9, + 6, 10, + 5, 11, + 4, 12, + 3, 13, + 2, 14, + 1, 15, + 16 +}; +const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); + +bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) +{ + const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + + if (!total_used_syms) + { + model.clear(); + return true; + } + + if (!model.m_code_sizes.resize(total_used_syms)) + return false; + + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + + const uint32 num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + return false; + + static_huffman_data_model dm; + if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) + return false; + + for (uint32 i = 0; i < num_codelength_codes_to_send; i++) + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + + if (!dm.prepare_decoder_tables()) + return false; + + uint32 ofs = 0; + while (ofs < total_used_syms) + { + const uint32 num_remaining = total_used_syms - ofs; + + uint32 code = decode(dm); + if (code <= 16) + model.m_code_sizes[ofs++] = static_cast(code); + else if (code == cSmallZeroRunCode) + { + uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if (code == cLargeZeroRunCode) + { + uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) + { + uint32 len; + if (code == cSmallRepeatCode) + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + else + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + + if ((!ofs) || (len > num_remaining)) + return false; + const uint32 prev = model.m_code_sizes[ofs - 1]; + if (!prev) + return false; + const uint32 end = ofs + len; + while (ofs < end) + model.m_code_sizes[ofs++] = static_cast(prev); + } + else + { + CRND_ASSERT(0); + return false; + } + } + + if (ofs != total_used_syms) + return false; + + return model.prepare_decoder_tables(); +} + +bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) +{ + if (!buf_size) + return false; + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + get_bits_init(); + + return true; +} + +void symbol_codec::get_bits_init() +{ + m_bit_buf = 0; + m_bit_count = 0; +} + +uint32 symbol_codec::decode_bits(uint32 num_bits) +{ + if (!num_bits) + return 0; + + if (num_bits > 16) + { + uint32 a = get_bits(num_bits - 16); + uint32 b = get_bits(16); + + return (a << 16) | b; + } + else + return get_bits(num_bits); +} + +uint32 symbol_codec::get_bits(uint32 num_bits) +{ + CRND_ASSERT(num_bits <= 32U); + + while (m_bit_count < (int)num_bits) + { + bit_buf_type c = 0; + if (m_pDecode_buf_next != m_pDecode_buf_end) + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRND_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (c << (cBitBufSize - m_bit_count)); + } + + uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; +} + +uint32 symbol_codec::decode(const static_huffman_data_model& model) +{ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + if (m_bit_count < 24) + { + if (m_bit_count < 16) + { + uint32 c0 = 0, c1 = 0; + const uint8* p = m_pDecode_buf_next; + if (p < m_pDecode_buf_end) c0 = *p++; + if (p < m_pDecode_buf_end) c1 = *p++; + m_pDecode_buf_next = p; + m_bit_count += 16; + uint32 c = (c0 << 8) | c1; + m_bit_buf |= (c << (32 - m_bit_count)); + } + else + { + uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; + m_bit_count += 8; + m_bit_buf |= (c << (32 - m_bit_count)); + } + } + + uint32 k = (m_bit_buf >> 16) + 1; + uint32 sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; + + CRND_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRND_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for ( ; ; ) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); + + if (((uint32)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRND_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; +} + + uint64 symbol_codec::stop_decoding() + { +#if 0 + uint32 i = get_bits(4); + uint32 k = get_bits(3); + i, k; + CRND_ASSERT((i == 15) && (k == 3)); +#endif + + uint64 n = static_cast(m_pDecode_buf_next - m_pDecode_buf); + + return n; + } + +} // namespace crnd + +// File: crnd_dxt_hc_common.cpp +namespace crnd +{ + chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = + { + { 1, { { 0, 0, 8, 8, 0 } } }, + + { 2, { { 0, 0, 8, 4, 1 }, { 0, 4, 8, 4, 2 } } }, + { 2, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 8, 4 } } }, + + { 3, { { 0, 0, 8, 4, 1 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 0, 4, 8, 4, 2 }, { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 } } }, + + { 3, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 4, 6 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 4, 0, 4, 8, 4 }, { 0, 0, 4, 4, 5 }, { 0, 4, 4, 4, 7 } } }, + + { 4, { { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } } + }; + + chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = + { + // 2x2 + { 0, 0, 8, 8, 0 }, + + // 2x1 + { 0, 0, 8, 4, 1 }, + { 0, 4, 8, 4, 2 }, + + // 1x2 + { 0, 0, 4, 8, 3 }, + { 4, 0, 4, 8, 4 }, + + // 1x1 + { 0, 0, 4, 4, 5 }, + { 4, 0, 4, 4, 6 }, + { 0, 4, 4, 4, 7 }, + { 4, 4, 4, 4, 8 } + }; + +} // namespace crnd + +// File: crnd_dxt.cpp +namespace crnd +{ + const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = { 0U, 3U, 1U, 2U }; + const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = { 0U, 2U, 3U, 1U }; + + const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = { 0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U }; + const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = { 0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U }; + + const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 5, 4, 3, 2, 6, 7 }; + const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 7, 6, 5, 4, 3, 2 }; + + uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) + { + uint32 r = color.r; + uint32 g = color.g; + uint32 b = color.b; + + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 11U)); + } + + uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) + { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); + } + + color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) + { + uint32 b = packed_color & 31U; + uint32 g = (packed_color >> 5U) & 63U; + uint32 r = (packed_color >> 11U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(r, g, b, alpha); + } + + void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) + { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set( (c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set(0, 0, 0, 0); + + return 3; + } + + uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // Turns out some GPU's round and some don't. Great. + //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + + pDst[2].set( (c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set( (c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + + return 4; + } + + uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + return get_block_colors4(pDst, color0, color1); + else + return get_block_colors3(pDst, color0, color1); + } + + color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) + { + CRND_ASSERT(index < 2); + return unpack_color( static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha ); + } + + uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); + } + + void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + if (scaled) + { + CRND_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } + else + { + CRND_ASSERT(value <= 0xF); + } + + uint32 ofs = (y << 1U) + (x >> 1U); + uint32 c = m_alpha[ofs]; + + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); + + m_alpha[ofs] = static_cast(c); + } + + uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + value >>= 4; + value &= 0xF; + + if (scaled) + value = (value << 4U) | value; + + return value; + } + + uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h ) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } + + uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h ) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } + + uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + + uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h ) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h ) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; + } + + uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) + { + CRND_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; + } + + uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); + } + + uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + +} // namespace crnd + +// File: crnd_decode.cpp +#define CRND_CREATE_BYTE_STREAMS 0 + +namespace crnd +{ +#if CRND_CREATE_BYTE_STREAMS + static void write_array_to_file(const char* pFilename, const vector& buf) + { + FILE* pFile = fopen(pFilename, "wb"); + fwrite(&buf[0], buf.size(), 1, pFile); + fclose(pFile); + } +#endif + + struct crnd_chunk_tile_desc + { + // These values are in blocks + uint8 m_x_ofs; + uint8 m_y_ofs; + uint8 m_width; + uint8 m_height; + }; + + struct crnd_chunk_encoding_desc + { + uint32 m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + +#if 0 + static crnd_chunk_encoding_desc g_crnd_chunk_encodings[cNumChunkEncodings] = + { + { 1, { { 0, 0, 2, 2 } } }, + + { 2, { { 0, 0, 2, 1 }, { 0, 1, 2, 1 } } }, + { 2, { { 0, 0, 1, 2 }, { 1, 0, 1, 2 } } }, + + { 3, { { 0, 0, 2, 1 }, { 0, 1, 1, 1 }, { 1, 1, 1, 1 } } }, + { 3, { { 0, 1, 2, 1 }, { 0, 0, 1, 1 }, { 1, 0, 1, 1 } } }, + + { 3, { { 0, 0, 1, 2 }, { 1, 0, 1, 1 }, { 1, 1, 1, 1 } } }, + { 3, { { 1, 0, 1, 2 }, { 0, 0, 1, 1 }, { 0, 1, 1, 1 } } }, + + { 1, { { 0, 0, 1, 1 }, { 1, 0, 1, 1 }, { 0, 1, 1, 1 }, { 1, 1, 1, 1 } } } + }; +#endif + + struct crnd_encoding_tile_indices + { + uint8 m_tiles[4]; + }; + + static crnd_encoding_tile_indices g_crnd_chunk_encoding_tiles[cNumChunkEncodings] = + { + { { 0, 0, 0, 0 } }, + + { { 0, 0, 1, 1 } }, + { { 0, 1, 0, 1 } }, + + { { 0, 0, 1, 2 } }, + { { 1, 2, 0, 0 } }, + + { { 0, 1, 0, 2 } }, + { { 1, 0, 2, 0 } }, + + { { 0, 1, 2, 3 } } + }; + + static uint8 g_crnd_chunk_encoding_num_tiles[cNumChunkEncodings] = { 1, 2, 2, 3, 3, 3, 3, 4 }; + + class crn_unpacker + { + public: + inline crn_unpacker() : + m_magic(cMagicValue), + m_pData(NULL), + m_data_size(0), + m_pHeader(NULL) + { + } + + inline ~crn_unpacker() + { + m_magic = 0; + } + + inline bool is_valid() const { return m_magic == cMagicValue; } + + bool init(const void* pData, uint32 data_size) + { + m_pHeader = crnd_get_header(m_tmp_header, pData, data_size); + if (!m_pHeader) + return false; + + m_pData = static_cast(pData); + m_data_size = data_size; + + if (!init_tables()) + return false; + + if (!decode_palettes()) + return false; + + return true; + } + + bool unpack_level( + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; + + uint32 next_level_ofs = m_data_size; + if ((level_index + 1) < (m_pHeader->m_levels)) + next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; + + CRND_ASSERT(next_level_ofs > cur_level_ofs); + + return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool unpack_level( + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + dst_size_in_bytes; + +#ifdef CRND_BUILD_DEBUG + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + if (!pDst[f]) + return false; +#endif + + const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); + const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); + const uint32 blocks_x = (width + 3U) >> 2U; + const uint32 blocks_y = (height + 3U) >> 2U; + const uint32 block_size = ((m_pHeader->m_format == cCRNFmtDXT1) || (m_pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + + uint32 minimal_row_pitch = block_size * blocks_x; + if (!row_pitch_in_bytes) + row_pitch_in_bytes = minimal_row_pitch; + else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) + return false; + if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) + return false; + + const uint32 chunks_x = (blocks_x + 1) >> 1; + const uint32 chunks_y = (blocks_y + 1) >> 1; + +#if CRND_CREATE_BYTE_STREAMS + crnd_trace("Index stream: %u bytes\n", src_size_in_bytes); +#endif + + if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) + return false; + + bool status = false; + switch (m_pHeader->m_format) + { + case cCRNFmtDXT1: + status = unpack_dxt1((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXT5: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + status = unpack_dxt5((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXT5A: + status = unpack_dxt5a((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + status = unpack_dxn((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + default: + return false; + } + if (!status) + return false; + + m_codec.stop_decoding(); + return true; + } + + inline const void* get_data() const { return m_pData; } + inline uint32 get_data_size() const { return m_data_size; } + + private: + enum { cMagicValue = 0x1EF9CABD }; + uint32 m_magic; + + const uint8* m_pData; + uint32 m_data_size; + crn_header m_tmp_header; + const crn_header* m_pHeader; + + symbol_codec m_codec; + + static_huffman_data_model m_chunk_encoding_dm; + static_huffman_data_model m_endpoint_delta_dm[2]; + static_huffman_data_model m_selector_delta_dm[2]; + + crnd::vector m_color_endpoints; + crnd::vector m_color_selectors; + + crnd::vector m_alpha_endpoints; + crnd::vector m_alpha_selectors; + + bool init_tables() + { + if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) + return false; + + if (!m_codec.decode_receive_static_data_model(m_chunk_encoding_dm)) + return false; + + if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) + return false; + + if (m_pHeader->m_color_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) return false; + } + + m_codec.stop_decoding(); + + return true; + } + + bool decode_palettes() + { + if (m_pHeader->m_color_endpoints.m_num) + { + if (!decode_color_endpoints()) return false; + if (!decode_color_selectors()) return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!decode_alpha_endpoints()) return false; + if (!decode_alpha_selectors()) return false; + } + + return true; + } + + bool decode_color_endpoints() + { + const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; + + if (!m_color_endpoints.resize(num_color_endpoints)) + return false; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) + return false; + + static_huffman_data_model dm[2]; + for (uint32 i = 0; i < 2; i++) + if (!m_codec.decode_receive_static_data_model(dm[i])) + return false; + + uint32 a = 0, b = 0, c = 0; + uint32 d = 0, e = 0, f = 0; + + uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector byte_stream; +#endif + + for (uint32 i = 0; i < num_color_endpoints; i++) + { + uint32 da, db, dc, dd, de, df; + CRND_HUFF_DECODE(m_codec, dm[0], da); a = (a + da) & 31; + CRND_HUFF_DECODE(m_codec, dm[1], db); b = (b + db) & 63; + CRND_HUFF_DECODE(m_codec, dm[0], dc); c = (c + dc) & 31; + + CRND_HUFF_DECODE(m_codec, dm[0], dd); d = (d + dd) & 31; + CRND_HUFF_DECODE(m_codec, dm[1], de); e = (e + de) & 63; + CRND_HUFF_DECODE(m_codec, dm[0], df); f = (f + df) & 31; + +#if CRND_CREATE_BYTE_STREAMS + byte_stream.push_back(da); + byte_stream.push_back(db); + byte_stream.push_back(dc); + byte_stream.push_back(dd); + byte_stream.push_back(de); + byte_stream.push_back(df); +#endif + + if (c_crnd_little_endian_platform) + *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); + else + *pDst++ = f | (e << 5U) | (d << 11U) | (c << 16U) | (b << 21U) | (a << 27U); + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"colorendpoints.bin", byte_stream); + crnd_trace("color endpoints: %u\n", (uint)m_pHeader->m_color_endpoints.m_size); +#endif + + return true; + } + + bool decode_color_selectors() + { + const uint32 cMaxSelectorValue = 3U; + const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U; + + const uint32 num_color_selectors = m_pHeader->m_color_selectors.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue; + for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) + { + delta0[i] = l; + delta1[i] = m; + + if (++l > (int32)cMaxSelectorValue) + { + l = -(int32)cMaxSelectorValue; + m++; + } + } + + uint32 cur[16]; + utils::zero_object(cur); + + if (!m_color_selectors.resize(num_color_selectors)) + return false; + + uint32* CRND_RESTRICT pDst = &m_color_selectors[0]; + + const uint8* pFrom_linear = g_dxt1_from_linear; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector byte_stream; +#endif + + for (uint32 i = 0; i < num_color_selectors; i++) + { + for (uint32 j = 0; j < 8; j++) + { + int32 sym; + CRND_HUFF_DECODE(m_codec, dm, sym); + +#if CRND_CREATE_BYTE_STREAMS + byte_stream.push_back(sym); +#endif + + cur[j*2+0] = (delta0[sym] + cur[j*2+0]) & 3; + cur[j*2+1] = (delta1[sym] + cur[j*2+1]) & 3; + } + + if (c_crnd_little_endian_platform) + { + *pDst++ = + (pFrom_linear[cur[0 ]] ) | (pFrom_linear[cur[1 ]] << 2) | (pFrom_linear[cur[2 ]] << 4) | (pFrom_linear[cur[3 ]] << 6) | + (pFrom_linear[cur[4 ]] << 8) | (pFrom_linear[cur[5 ]] << 10) | (pFrom_linear[cur[6 ]] << 12) | (pFrom_linear[cur[7 ]] << 14) | + (pFrom_linear[cur[8 ]] << 16) | (pFrom_linear[cur[9 ]] << 18) | (pFrom_linear[cur[10]] << 20) | (pFrom_linear[cur[11]] << 22) | + (pFrom_linear[cur[12]] << 24) | (pFrom_linear[cur[13]] << 26) | (pFrom_linear[cur[14]] << 28) | (pFrom_linear[cur[15]] << 30); + } + else + { + *pDst++ = + (pFrom_linear[cur[8 ]] ) | (pFrom_linear[cur[9 ]] << 2) | (pFrom_linear[cur[10]] << 4) | (pFrom_linear[cur[11]] << 6) | + (pFrom_linear[cur[12]] << 8) | (pFrom_linear[cur[13]] << 10) | (pFrom_linear[cur[14]] << 12) | (pFrom_linear[cur[15]] << 14) | + (pFrom_linear[cur[0 ]] << 16) | (pFrom_linear[cur[1 ]] << 18) | (pFrom_linear[cur[2 ]] << 20) | (pFrom_linear[cur[3 ]] << 22) | + (pFrom_linear[cur[4 ]] << 24) | (pFrom_linear[cur[5 ]] << 26) | (pFrom_linear[cur[6 ]] << 28) | (pFrom_linear[cur[7 ]] << 30); + } + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"colorselectors.bin", byte_stream); + crnd_trace("color selectors: %u\n", (uint)m_pHeader->m_color_selectors.m_size); +#endif + + return true; + } + + bool decode_alpha_endpoints() + { + const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + if (!m_alpha_endpoints.resize(num_alpha_endpoints)) + return false; + + uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; + uint32 a = 0, b = 0; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 i = 0; i < num_alpha_endpoints; i++) + { + uint sa; CRND_HUFF_DECODE(m_codec, dm, sa); + uint sb; CRND_HUFF_DECODE(m_codec, dm, sb); + + a = (sa + a) & 255; + b = (sb + b) & 255; + + *pDst++ = (uint16)(a | (b << 8)); + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + + return true; + } + + bool decode_alpha_selectors() + { + const uint32 cMaxSelectorValue = 7U; + const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U; + + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue; + for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) + { + delta0[i] = l; + delta1[i] = m; + + if (++l > (int32)cMaxSelectorValue) + { + l = -(int32)cMaxSelectorValue; + m++; + } + } + + uint32 cur[16]; + utils::zero_object(cur); + + if (!m_alpha_selectors.resize(num_alpha_selectors * 3)) + return false; + + uint16* CRND_RESTRICT pDst = &m_alpha_selectors[0]; + + const uint8* pFrom_linear = g_dxt5_from_linear; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 i = 0; i < num_alpha_selectors; i++) + { + for (uint32 j = 0; j < 8; j++) + { + int32 sym; + CRND_HUFF_DECODE(m_codec, dm, sym); + + cur[j*2+0] = (delta0[sym] + cur[j*2+0]) & 7; + cur[j*2+1] = (delta1[sym] + cur[j*2+1]) & 7; + //cur[j*2+0] = ((sym%15)-7 + cur[j*2+0]) & 7; + //cur[j*2+1] = ((sym/15)-7 + cur[j*2+1]) & 7; + } + +#if 0 + dxt5_block blk; + for (uint32 y = 0; y < 4; y++) + for (uint32 x = 0; x < 4; x++) + blk.set_selector(x, y, pFrom_linear[cur[x+y*4]]); + + *pDst++ = blk.get_selectors_as_word(0); + *pDst++ = blk.get_selectors_as_word(1); + *pDst++ = blk.get_selectors_as_word(2); +#else + *pDst++ = (uint16)((pFrom_linear[cur[0 ]] ) | (pFrom_linear[cur[1 ]] << 3) | (pFrom_linear[cur[2 ]] << 6) | (pFrom_linear[cur[3 ]] << 9) | + (pFrom_linear[cur[4 ]] << 12) | (pFrom_linear[cur[5 ]] << 15)); + + *pDst++ = (uint16)((pFrom_linear[cur[5 ]] >> 1) | (pFrom_linear[cur[6 ]] << 2) | (pFrom_linear[cur[7 ]] << 5) | + (pFrom_linear[cur[8 ]] << 8) | (pFrom_linear[cur[9 ]] << 11) | (pFrom_linear[cur[10]] << 14)); + + *pDst++ = (uint16)((pFrom_linear[cur[10]] >> 2) | (pFrom_linear[cur[11]] << 1) | (pFrom_linear[cur[12]] << 4) | + (pFrom_linear[cur[13]] << 7) | (pFrom_linear[cur[14]] << 10) | (pFrom_linear[cur[15]] << 13)); +#endif + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + + return true; + } + + static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) + { + uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); + uint32 Micro = ((y & 6) << 2) << LogBpp; + + return Macro + + ((Micro & ~15) << 1) + + (Micro & 15) + + ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); + } + + static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) + { + uint32 Macro = (x >> 5) << (LogBpp + 7); + uint32 Micro = (x & 7) << LogBpp; + uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); + + return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + + ((y & 16) << 7) + + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); + } + + static inline void limit(uint& x, uint n) + { + int v = x - n; + int msk = (v >> 31); + x = (x & msk) | (v & ~msk); + } + + bool unpack_dxt1(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_color_selectors = m_color_selectors.size(); + + uint32 prev_color_endpoint_index = 0; + uint32 prev_color_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 8; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector tile_encoding_stream; + vector endpoint_indices_stream; + vector selector_indices_stream; +#endif + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 color_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); +#if CRND_CREATE_BYTE_STREAMS + tile_encoding_stream.push_back(chunk_encoding_bits & 7); + tile_encoding_stream.push_back((chunk_encoding_bits >> 3) & 7); + tile_encoding_stream.push_back((chunk_encoding_bits >> 6) & 7); +#endif + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; + CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[0], delta); +#if CRND_CREATE_BYTE_STREAMS + endpoint_indices_stream.push_back(delta); +#endif + prev_color_endpoint_index += delta; + limit(prev_color_endpoint_index, num_color_endpoints); + color_endpoints[i] = m_color_endpoints[prev_color_endpoint_index]; + } + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + if ((!skip_bottom_row) && (!skip_right_col)) + { + //CRND_ASSERT( ((uint8*)&pD[4 + row_pitch_in_dwords] - pDst) <= dst_size_in_bytes ); + + pD[0] = color_endpoints[pTile_indices[0]]; + CRND_WRITE_BARRIER + uint32 delta0; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta0); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta0); +#endif + prev_color_selector_index += delta0; + limit(prev_color_selector_index, num_color_selectors); + pD[1] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[2] = color_endpoints[pTile_indices[1]]; + CRND_WRITE_BARRIER + uint32 delta1; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta1); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta1); +#endif + prev_color_selector_index += delta1; + limit(prev_color_selector_index, num_color_selectors); + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[0 + row_pitch_in_dwords] = color_endpoints[pTile_indices[2]]; + CRND_WRITE_BARRIER + uint32 delta2; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta2); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta2); +#endif + prev_color_selector_index += delta2; + limit(prev_color_selector_index, num_color_selectors); + pD[1 + row_pitch_in_dwords] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[2 + row_pitch_in_dwords] = color_endpoints[pTile_indices[3]]; + CRND_WRITE_BARRIER + uint32 delta3; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta3); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta3); +#endif + prev_color_selector_index += delta3; + limit(prev_color_selector_index, num_color_selectors); + pD[3 + row_pitch_in_dwords] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + } + else + { + for (uint32 by = 0; by < 2; by++) + { + pD = (uint32*)((uint8*)pBlock + row_pitch_in_bytes * by); + for (uint32 bx = 0; bx < 2; bx++, pD += 2) + { + uint32 delta; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta); +#endif + prev_color_selector_index += delta; + limit(prev_color_selector_index, num_color_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + pD[0] = color_endpoints[pTile_indices[bx + by * 2]]; + CRND_WRITE_BARRIER + pD[1] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + } + } + } + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"tile_encodings.bin", tile_encoding_stream); + write_array_to_file(L"endpoint_indices.bin", endpoint_indices_stream); + write_array_to_file(L"selector_indices.bin", selector_indices_stream); +#endif + + return true; + } + + bool unpack_dxt5(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_color_selectors = m_color_selectors.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_color_endpoint_index = 0; + uint32 prev_color_selector_index = 0; + uint32 prev_alpha_endpoint_index = 0; + uint32 prev_alpha_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + //const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 16; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 color_endpoints[4]; + uint32 alpha_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha_endpoint_index += delta; + limit(prev_alpha_endpoint_index, num_alpha_endpoints); + alpha_endpoints[i] = m_alpha_endpoints[prev_alpha_endpoint_index]; + } + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[0], delta); + prev_color_endpoint_index += delta; + limit(prev_color_endpoint_index, num_color_endpoints); + color_endpoints[i] = m_color_endpoints[prev_color_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 4) + { + uint32 delta0; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta0); + prev_alpha_selector_index += delta0; + limit(prev_alpha_selector_index, num_alpha_selectors); + + uint32 delta1; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta1); + prev_color_selector_index += delta1; + limit(prev_color_selector_index, num_color_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha_selectors = &m_alpha_selectors[prev_alpha_selector_index * 3]; + +#ifdef CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha_endpoints[tile_index] << 16) | pAlpha_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha_selectors[1] << 16) | pAlpha_selectors[2]; + CRND_WRITE_BARRIER + pD[2] = color_endpoints[tile_index]; + CRND_WRITE_BARRIER + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha_endpoints[tile_index] | (pAlpha_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha_selectors[1] | (pAlpha_selectors[2] << 16); + CRND_WRITE_BARRIER + pD[2] = color_endpoints[tile_index]; + CRND_WRITE_BARRIER + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + + bool unpack_dxn(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_alpha0_endpoint_index = 0; + uint32 prev_alpha0_selector_index = 0; + uint32 prev_alpha1_endpoint_index = 0; + uint32 prev_alpha1_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + //const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 16; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 alpha0_endpoints[4]; + uint32 alpha1_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha0_endpoint_index += delta; + limit(prev_alpha0_endpoint_index, num_alpha_endpoints); + alpha0_endpoints[i] = m_alpha_endpoints[prev_alpha0_endpoint_index]; + } + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha1_endpoint_index += delta; + limit(prev_alpha1_endpoint_index, num_alpha_endpoints); + alpha1_endpoints[i] = m_alpha_endpoints[prev_alpha1_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 4) + { + uint32 delta0; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta0); + prev_alpha0_selector_index += delta0; + limit(prev_alpha0_selector_index, num_alpha_selectors); + + uint32 delta1; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta1); + prev_alpha1_selector_index += delta1; + limit(prev_alpha1_selector_index, num_alpha_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha0_selectors = &m_alpha_selectors[prev_alpha0_selector_index * 3]; + const uint16* pAlpha1_selectors = &m_alpha_selectors[prev_alpha1_selector_index * 3]; + +#ifdef CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha0_endpoints[tile_index] << 16) | pAlpha0_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha0_selectors[1] << 16) | pAlpha0_selectors[2]; + CRND_WRITE_BARRIER + pD[2] = (alpha1_endpoints[tile_index] << 16) | pAlpha1_selectors[0]; + CRND_WRITE_BARRIER + pD[3] = (pAlpha1_selectors[1] << 16) | pAlpha1_selectors[2]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha0_endpoints[tile_index] | (pAlpha0_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + CRND_WRITE_BARRIER + pD[2] = alpha1_endpoints[tile_index] | (pAlpha1_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + + bool unpack_dxt5a(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_alpha0_endpoint_index = 0; + uint32 prev_alpha0_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + const int32 cBytesPerBlock = 8; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 alpha0_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha0_endpoint_index += delta; + limit(prev_alpha0_endpoint_index, num_alpha_endpoints); + alpha0_endpoints[i] = m_alpha_endpoints[prev_alpha0_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 2) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta); + prev_alpha0_selector_index += delta; + limit(prev_alpha0_selector_index, num_alpha_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha0_selectors = &m_alpha_selectors[prev_alpha0_selector_index * 3]; + +#if CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha0_endpoints[tile_index] << 16) | pAlpha0_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha0_selectors[1] << 16) | pAlpha0_selectors[2]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha0_endpoints[tile_index] | (pAlpha0_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + }; + + crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + crn_unpacker* p = crnd_new(); + if (!p) + return NULL; + + if (!p->init(pData, data_size)) + { + crnd_delete(p); + return NULL; + } + + return p; + } + + bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) + { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + if (ppData) + *ppData = pUnpacker->get_data(); + + if (pData_size) + *pData_size = pUnpacker->get_data_size(); + + return true; + } + + bool crnd_unpack_level( + crnd_unpack_context pContext, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool crnd_unpack_end(crnd_unpack_context pContext) + { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + crnd_delete(pUnpacker); + + return true; + } + +} // namespace crnd + +#endif // CRND_HEADER_FILE_ONLY + + //------------------------------------------------------------------------------ + // + // crunch/crnlib uses a modified ZLIB license. Specifically, it's the same as zlib except that + // public credits for using the library are *required*. + // + // Copyright (c) 2010-2016 Richard Geldreich, Jr. All rights reserved. + // + // This software is provided 'as-is', without any express or implied + // warranty. In no event will the authors be held liable for any damages + // arising from the use of this software. + // + // Permission is granted to anyone to use this software for any purpose, + // including commercial applications, and to alter it and redistribute it + // freely, subject to the following restrictions: + // + // 1. The origin of this software must not be misrepresented; you must not + // claim that you wrote the original software. + // + // 2. If you use this software in a product, this acknowledgment in the product + // documentation or credits is required: + // + // "Crunch Library Copyright (c) 2010-2016 Richard Geldreich, Jr." + // + // 3. Altered source versions must be plainly marked as such, and must not be + // misrepresented as being the original software. + // + // 4. This notice may not be removed or altered from any source distribution. + // + //------------------------------------------------------------------------------ + diff --git a/Texture2DDecoder/crunch/crnlib.h b/Texture2DDecoder/crunch/crnlib.h new file mode 100644 index 0000000..40b69b4 --- /dev/null +++ b/Texture2DDecoder/crunch/crnlib.h @@ -0,0 +1,645 @@ +// File: crnlib.h - Advanced DXTn texture compression library. +// Copyright (c) 2010-2016 Richard Geldreich, Jr. All rights reserved. +// See copyright notice and license at the end of this file. +// +// This header file contains the public crnlib declarations for DXTn, +// clustered DXTn, and CRN compression/decompression. +// +// Note: This library does NOT need to be linked into your game executable if +// all you want to do is transcode .CRN files to raw DXTn bits at run-time. +// The crn_decomp.h header file library contains all the code necessary for +// decompression. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +#ifndef CRNLIB_H +#define CRNLIB_H + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +#define CRNLIB_VERSION 104 + +#define CRNLIB_SUPPORT_ATI_COMPRESS 0 +#define CRNLIB_SUPPORT_SQUISH 0 + +typedef unsigned char crn_uint8; +typedef unsigned short crn_uint16; +typedef unsigned int crn_uint32; +typedef signed char crn_int8; +typedef signed short crn_int16; +typedef signed int crn_int32; +typedef unsigned int crn_bool; + +// crnlib can compress to these file types. +enum crn_file_type +{ + // .CRN + cCRNFileTypeCRN = 0, + + // .DDS using regular DXT or clustered DXT + cCRNFileTypeDDS, + + cCRNFileTypeForceDWORD = 0xFFFFFFFF +}; + +// Supported compressed pixel formats. +// Basically all the standard DX9 formats, with some swizzled DXT5 formats +// (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. +enum crn_format +{ + cCRNFmtInvalid = -1, + + cCRNFmtDXT1 = 0, + + cCRNFmtFirstValid = cCRNFmtDXT1, + + // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. + cCRNFmtDXT3, + + cCRNFmtDXT5, + + // Various DXT5 derivatives + cCRNFmtDXT5_CCxY, // Luma-chroma + cCRNFmtDXT5_xGxR, // Swizzled 2-component + cCRNFmtDXT5_xGBR, // Swizzled 3-component + cCRNFmtDXT5_AGBR, // Swizzled 4-component + + // ATI 3DC and X360 DXN + cCRNFmtDXN_XY, + cCRNFmtDXN_YX, + + // DXT5 alpha blocks only + cCRNFmtDXT5A, + + cCRNFmtETC1, + + cCRNFmtTotal, + + cCRNFmtForceDWORD = 0xFFFFFFFF +}; + +// Various library/file format limits. +enum crn_limits +{ + // Max. mipmap level resolution on any axis. + cCRNMaxLevelResolution = 4096, + + cCRNMinPaletteSize = 8, + cCRNMaxPaletteSize = 8192, + + cCRNMaxFaces = 6, + cCRNMaxLevels = 16, + + cCRNMaxHelperThreads = 16, + + cCRNMinQualityLevel = 0, + cCRNMaxQualityLevel = 255 +}; + +// CRN/DDS compression flags. +// See the m_flags member in the crn_comp_params struct, below. +enum crn_comp_flags +{ + // Enables perceptual colorspace distance metrics if set. + // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! + // Default: Set + cCRNCompFlagPerceptual = 1, + + // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. + // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine + // where large macroblocks can be used without artifacts isn't perfect. + // Default: Set. + cCRNCompFlagHierarchical = 2, + + // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. + // Default: Not set. + cCRNCompFlagQuick = 4, + + // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. + // DXT5: OK to use both DXT5 block types. + // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. + // Default: Set. + cCRNCompFlagUseBothBlockTypes = 8, + + // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). + // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. + // Default: Not set. + cCRNCompFlagUseTransparentIndicesForBlack = 16, + + // Disables endpoint caching, for more deterministic output. + // Currently only used when writing to .DDS files. + // Default: Not set. + cCRNCompFlagDisableEndpointCaching = 32, + + // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. + // Default: Not set. + cCRNCompFlagManualPaletteSizes = 64, + + // If enabled, DXT1A alpha blocks are used to encode single bit transparency. + // Default: Not set. + cCRNCompFlagDXT1AForTransparency = 128, + + // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). + // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks + // with some chroma present if doing so will ultimately lead to lower luma error). + // Only enable on grayscale source images. + // Default: Not set. + cCRNCompFlagGrayscaleSampling = 256, + + // If enabled, debug information will be output during compression. + // Default: Not set. + cCRNCompFlagDebugging = 0x80000000, + + cCRNCompFlagForceDWORD = 0xFFFFFFFF +}; + +// Controls DXTn quality vs. speed control - only used when compressing to .DDS. +enum crn_dxt_quality +{ + cCRNDXTQualitySuperFast, + cCRNDXTQualityFast, + cCRNDXTQualityNormal, + cCRNDXTQualityBetter, + cCRNDXTQualityUber, + + cCRNDXTQualityTotal, + + cCRNDXTQualityForceDWORD = 0xFFFFFFFF +}; + +// Which DXTn compressor to use when compressing to plain (non-clustered) .DDS. +enum crn_dxt_compressor_type +{ + cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) + cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor + cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) + +#if CRNLIB_SUPPORT_ATI_COMPRESS + cCRNDXTCompressorATI, +#endif + +#if CRNLIB_SUPPORT_SQUISH + cCRNDXTCompressorSquish, +#endif + + cCRNTotalDXTCompressors, + + cCRNDXTCompressorForceDWORD = 0xFFFFFFFF +}; + +// Progress callback function. +// Processing will stop prematurely (and fail) if the callback returns false. +// phase_index, total_phases - high level progress +// subphase_index, total_subphases - progress within current phase +typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); + +// CRN/DDS compression parameters struct. +struct crn_comp_params +{ + inline crn_comp_params() { clear(); } + + // Clear struct to default parameters. + inline void clear() + { + m_size_of_obj = sizeof(*this); + m_file_type = cCRNFileTypeCRN; + m_faces = 1; + m_width = 0; + m_height = 0; + m_levels = 1; + m_format = cCRNFmtDXT1; + m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + m_pImages[f][l] = NULL; + + m_target_bitrate = 0.0f; + m_quality_level = cCRNMaxQualityLevel; + m_dxt1a_alpha_threshold = 128; + m_dxt_quality = cCRNDXTQualityUber; + m_dxt_compressor_type = cCRNDXTCompressorCRN; + m_alpha_component = 3; + + m_crn_adaptive_tile_color_psnr_derating = 2.0f; + m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; + m_crn_color_endpoint_palette_size = 0; + m_crn_color_selector_palette_size = 0; + m_crn_alpha_endpoint_palette_size = 0; + m_crn_alpha_selector_palette_size = 0; + + m_num_helper_threads = 0; + m_userdata0 = 0; + m_userdata1 = 0; + m_pProgress_func = NULL; + m_pProgress_func_data = NULL; + } + + inline bool operator== (const crn_comp_params& rhs) const + { +#define CRNLIB_COMP(x) do { if ((x) != (rhs.x)) return false; } while(0) + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_file_type); + CRNLIB_COMP(m_faces); + CRNLIB_COMP(m_width); + CRNLIB_COMP(m_height); + CRNLIB_COMP(m_levels); + CRNLIB_COMP(m_format); + CRNLIB_COMP(m_flags); + CRNLIB_COMP(m_target_bitrate); + CRNLIB_COMP(m_quality_level); + CRNLIB_COMP(m_dxt1a_alpha_threshold); + CRNLIB_COMP(m_dxt_quality); + CRNLIB_COMP(m_dxt_compressor_type); + CRNLIB_COMP(m_alpha_component); + CRNLIB_COMP(m_crn_adaptive_tile_color_psnr_derating); + CRNLIB_COMP(m_crn_adaptive_tile_alpha_psnr_derating); + CRNLIB_COMP(m_crn_color_endpoint_palette_size); + CRNLIB_COMP(m_crn_color_selector_palette_size); + CRNLIB_COMP(m_crn_alpha_endpoint_palette_size); + CRNLIB_COMP(m_crn_alpha_selector_palette_size); + CRNLIB_COMP(m_num_helper_threads); + CRNLIB_COMP(m_userdata0); + CRNLIB_COMP(m_userdata1); + CRNLIB_COMP(m_pProgress_func); + CRNLIB_COMP(m_pProgress_func_data); + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + CRNLIB_COMP(m_pImages[f][l]); + +#undef CRNLIB_COMP + return true; + } + + // Returns true if the input parameters are reasonable. + inline bool check() const + { + if ( (m_file_type > cCRNFileTypeDDS) || + (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || + (m_dxt1a_alpha_threshold > 255) || + ((m_faces != 1) && (m_faces != 6)) || + ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || + ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || + ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || + ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || + ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || + (m_alpha_component > 3) || + (m_num_helper_threads > cCRNMaxHelperThreads) || + (m_dxt_quality > cCRNDXTQualityUber) || + (m_dxt_compressor_type >= cCRNTotalDXTCompressors) ) + { + return false; + } + return true; + } + + // Helper to set/get flags from m_flags member. + inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } + inline void set_flag(crn_comp_flags flag, bool val) { m_flags &= ~flag; if (val) m_flags |= flag; } + + crn_uint32 m_size_of_obj; + + crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. + + crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) + crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + + crn_format m_format; // Output pixel format. + + crn_uint32 m_flags; // see crn_comp_flags enum + + // Array of pointers to 32bpp input images. + const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; + + // Target bitrate - if non-zero, the compressor will use an interpolative search to find the + // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll + // try disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and redo the search. This process can be pretty slow. + float m_target_bitrate; + + // Desired quality level. + // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. + crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] + + // DXTn compression parameters. + crn_uint32 m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + crn_dxt_compressor_type m_dxt_compressor_type; + + // Alpha channel's component. Defaults to 3. + crn_uint32 m_alpha_component; + + // Various low-level CRN specific parameters. + float m_crn_adaptive_tile_color_psnr_derating; + float m_crn_adaptive_tile_alpha_psnr_derating; + + crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + // Number of helper threads to create during compression. 0=no threading. + crn_uint32 m_num_helper_threads; + + // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. + crn_uint32 m_userdata0; + crn_uint32 m_userdata1; + + // User provided progress callback. + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; +}; + +// Mipmap generator's mode. +enum crn_mip_mode +{ + cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps + cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps + cCRNMipModeGenerateMips, // Always generate new mipmaps + cCRNMipModeNoMips, // Output texture has no mipmaps + + cCRNMipModeTotal, + + cCRNModeForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_mip_mode_desc(crn_mip_mode m); +const char* crn_get_mip_mode_name(crn_mip_mode m); + +// Mipmap generator's filter kernel. +enum crn_mip_filter +{ + cCRNMipFilterBox, + cCRNMipFilterTent, + cCRNMipFilterLanczos4, + cCRNMipFilterMitchell, + cCRNMipFilterKaiser, // Kaiser=default mipmap filter + + cCRNMipFilterTotal, + + cCRNMipFilterForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_mip_filter_name(crn_mip_filter f); + +// Mipmap generator's scale mode. +enum crn_scale_mode +{ + cCRNSMDisabled, + cCRNSMAbsolute, + cCRNSMRelative, + cCRNSMLowerPow2, + cCRNSMNearestPow2, + cCRNSMNextPow2, + + cCRNSMTotal, + + cCRNSMForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_scale_mode_desc(crn_scale_mode sm); + +// Mipmap generator parameters. +struct crn_mipmap_params +{ + inline crn_mipmap_params() { clear(); } + + inline void clear() + { + m_size_of_obj = sizeof(*this); + m_mode = cCRNMipModeUseSourceOrGenerateMips; + m_filter = cCRNMipFilterKaiser; + m_gamma_filtering = true; + m_gamma = 2.2f; + // Default "blurriness" factor of .9 actually sharpens the output a little. + m_blurriness = .9f; + m_renormalize = false; + m_tiled = false; + m_max_levels = cCRNMaxLevels; + m_min_mip_size = 1; + + m_scale_mode = cCRNSMDisabled; + m_scale_x = 1.0f; + m_scale_y = 1.0f; + + m_window_left = 0; + m_window_top = 0; + m_window_right = 0; + m_window_bottom = 0; + + m_clamp_scale = false; + m_clamp_width = 0; + m_clamp_height = 0; + } + + inline bool check() const { return true; } + + inline bool operator== (const crn_mipmap_params& rhs) const + { +#define CRNLIB_COMP(x) do { if ((x) != (rhs.x)) return false; } while(0) + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_mode); + CRNLIB_COMP(m_filter); + CRNLIB_COMP(m_gamma_filtering); + CRNLIB_COMP(m_gamma); + CRNLIB_COMP(m_blurriness); + CRNLIB_COMP(m_renormalize); + CRNLIB_COMP(m_tiled); + CRNLIB_COMP(m_max_levels); + CRNLIB_COMP(m_min_mip_size); + CRNLIB_COMP(m_scale_mode); + CRNLIB_COMP(m_scale_x); + CRNLIB_COMP(m_scale_y); + CRNLIB_COMP(m_window_left); + CRNLIB_COMP(m_window_top); + CRNLIB_COMP(m_window_right); + CRNLIB_COMP(m_window_bottom); + CRNLIB_COMP(m_clamp_scale); + CRNLIB_COMP(m_clamp_width); + CRNLIB_COMP(m_clamp_height); + return true; +#undef CRNLIB_COMP + } + crn_uint32 m_size_of_obj; + + crn_mip_mode m_mode; + crn_mip_filter m_filter; + + crn_bool m_gamma_filtering; + float m_gamma; + + float m_blurriness; + + crn_uint32 m_max_levels; + crn_uint32 m_min_mip_size; + + crn_bool m_renormalize; + crn_bool m_tiled; + + crn_scale_mode m_scale_mode; + float m_scale_x; + float m_scale_y; + + crn_uint32 m_window_left; + crn_uint32 m_window_top; + crn_uint32 m_window_right; + crn_uint32 m_window_bottom; + + crn_bool m_clamp_scale; + crn_uint32 m_clamp_width; + crn_uint32 m_clamp_height; +}; + +// -------- High-level helper function definitions for CDN/DDS compression. + +#ifndef CRNLIB_MIN_ALLOC_ALIGNMENT +#define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 +#endif + +// Function to set an optional user provided memory allocation/reallocation/msize routines. +// By default, crnlib just uses malloc(), free(), etc. for all allocations. +typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); +typedef size_t (*crn_msize_func)(void* p, void* pUser_data); +void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); + +// Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). +void crn_free_block(void *pBlock); + +// Compresses a 32-bit/pixel texture to either: a regular DX9 DDS file, a "clustered" (or reduced entropy) DX9 DDS file, or a CRN file in memory. +// Input parameters: +// comp_params is the compression parameters struct, defined above. +// compressed_size will be set to the size of the returned memory block containing the output file. +// The returned block must be freed by calling crn_free_block(). +// *pActual_quality_level will be set to the actual quality level used to compress the image. May be NULL. +// *pActual_bitrate will be set to the output file's effective bitrate, possibly taking into account LZMA compression. May be NULL. +// Return value: +// The compressed file data, or NULL on failure. +// compressed_size will be set to the size of the returned memory buffer. +// Notes: +// A "regular" DDS file is compressed using normal DXTn compression at the specified DXT quality level. +// A "clustered" DDS file is compressed using clustered DXTn compression to either the target bitrate or the specified integer quality factor. +// The output file is a standard DX9 format DDS file, except the compressor assumes you will be later losslessly compressing the DDS output file using the LZMA algorithm. +// A texture is defined as an array of 1 or 6 "faces" (6 faces=cubemap), where each "face" consists of between [1,cCRNMaxLevels] mipmap levels. +// Mipmap levels are simple 32-bit 2D images with a pitch of width*sizeof(uint32), arranged in the usual raster order (top scanline first). +// The image pixels may be grayscale (YYYX bytes in memory), grayscale/alpha (YYYA in memory), 24-bit (RGBX in memory), or 32-bit (RGBA) colors (where "X"=don't care). +// RGB color data is generally assumed to be in the sRGB colorspace. If not, be sure to clear the "cCRNCompFlagPerceptual" in the crn_comp_params struct! +void *crn_compress(const crn_comp_params &comp_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +// Like the above function, except this function can also do things like generate mipmaps, and resize or crop the input texture before compression. +// The actual operations performed are controlled by the crn_mipmap_params struct members. +// Be sure to set the "m_gamma_filtering" member of crn_mipmap_params to false if the input texture is not sRGB. +void *crn_compress(const crn_comp_params &comp_params, const crn_mipmap_params &mip_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +// Transcodes an entire CRN file to DDS using the crn_decomp.h header file library to do most of the heavy lifting. +// The output DDS file's format is guaranteed to be one of the DXTn formats in the crn_format enum. +// This is a fast operation, because the CRN format is explicitly designed to be efficiently transcodable to DXTn. +// For more control over decompression, see the lower-level helper functions in crn_decomp.h, which do not depend at all on crnlib. +void *crn_decompress_crn_to_dds(const void *pCRN_file_data, crn_uint32 &file_size); + +// Decompresses an entire DDS file in any supported format to uncompressed 32-bit/pixel image(s). +// See the crnlib::pixel_format enum in inc/dds_defs.h for a list of the supported DDS formats. +// You are responsible for freeing each image block, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. +struct crn_texture_desc +{ + crn_uint32 m_faces; + crn_uint32 m_width; + crn_uint32 m_height; + crn_uint32 m_levels; + crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format +}; +bool crn_decompress_dds_to_images(const void *pDDS_file_data, crn_uint32 dds_file_size, crn_uint32 **ppImages, crn_texture_desc &tex_desc); + +// Frees all images allocated by crn_decompress_dds_to_images(). +void crn_free_all_images(crn_uint32 **ppImages, const crn_texture_desc &desc); + +// -------- crn_format related helpers functions. + +// Returns the FOURCC format equivalent to the specified crn_format. +crn_uint32 crn_get_format_fourcc(crn_format fmt); + +// Returns the crn_format's bits per texel. +crn_uint32 crn_get_format_bits_per_texel(crn_format fmt); + +// Returns the crn_format's number of bytes per block. +crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt); + +// Returns the non-swizzled, basic DXTn version of the specified crn_format. +// This is the format you would supply D3D or OpenGL. +crn_format crn_get_fundamental_dxt_format(crn_format fmt); + +// -------- String helpers. + +// Converts a crn_file_type to a string. +const char* crn_get_file_type_ext(crn_file_type file_type); + +// Converts a crn_format to a string. +const char* crn_get_format_string(crn_format fmt); + +// Converts a crn_dxt_quality to a string. +const char* crn_get_dxt_quality_string(crn_dxt_quality q); + +// -------- Low-level DXTn 4x4 block compressor API + +// crnlib's DXTn endpoint optimizer actually supports any number of source pixels (i.e. from 1 to thousands, not just 16), +// but for simplicity this API only supports 4x4 texel blocks. +typedef void *crn_block_compressor_context_t; + +// Create a DXTn block compressor. +// This function only supports the basic/nonswizzled "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. +// Avoid calling this multiple times if you intend on compressing many blocks, because it allocates some memory. +crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params ¶ms); + +// Compresses a block of 16 pixels to the destination DXTn block. +// pDst_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). +// pPixels should be an array of 16 crn_uint32's. Each crn_uint32 must be r,g,b,a (r is always first) in memory. +void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32 *pPixels, void *pDst_block); + +// Frees a DXTn block compressor. +void crn_free_block_compressor(crn_block_compressor_context_t pContext); + +// Unpacks a compressed block to pDst_pixels. +// pSrc_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). +// pDst_pixel should be an array of 16 crn_uint32's. Each uint32 will be r,g,b,a (r is always first) in memory. +// crn_fmt should be one of the "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. +// The various swizzled DXT5 formats (such as cCRNFmtDXT5_xGBR, etc.) will be unpacked as if they where plain DXT5. +// Returns false if the crn_fmt is invalid. +bool crn_decompress_block(const void *pSrc_block, crn_uint32 *pDst_pixels, crn_format crn_fmt); + +#endif // CRNLIB_H + +//------------------------------------------------------------------------------ +// +// crunch/crnlib uses a modified ZLIB license. Specifically, it's the same as zlib except that +// public credits for using the library are *required*. +// +// Copyright (c) 2010-2016 Richard Geldreich, Jr. All rights reserved. +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. +// +// 2. If you use this software in a product, this acknowledgment in the product +// documentation or credits is required: +// +// "Crunch Library Copyright (c) 2010-2016 Richard Geldreich, Jr." +// +// 3. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 4. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/Texture2DDecoder/endianness.h b/Texture2DDecoder/endianness.h new file mode 100644 index 0000000..7866e3b --- /dev/null +++ b/Texture2DDecoder/endianness.h @@ -0,0 +1,180 @@ +/* + * + * License Information + * + * endianness.h is derived from https://gist.github.com/jtbr/7a43e6281e6cca353b33ee501421860c + * The file is licensed under the MIT License shown below. + * + * + * The MIT License (MIT) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _ENDIANNESS_H +#define _ENDIANNESS_H + +#include +#include + +/* Detect platform endianness at compile time */ + +// If boost were available on all platforms, could use this instead to detect endianness +// #include + +// When available, these headers can improve platform endianness detection +#ifdef __has_include // C++17, supported as extension to C++11 in clang, GCC 5+, vs2015 +#if __has_include() +#include // gnu libc normally provides, linux +#elif __has_include() +#include //open bsd, macos +#elif __has_include() +#include // mingw, some bsd (not open/macos) +#elif __has_include() +#include // solaris +#endif +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN) || (defined(_BYTE_ORDER) && _BYTE_ORDER == _BIG_ENDIAN) || \ + (defined(BYTE_ORDER) && BYTE_ORDER == BIG_ENDIAN) || (defined(__sun) && defined(__SVR4) && defined(_BIG_ENDIAN)) || \ + defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || defined(_MIBSEB) || defined(__MIBSEB) || \ + defined(__MIBSEB__) || defined(_M_PPC) +#define __BIG_ENDIAN__ +#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || /* gcc */ \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN) /* linux header */ || \ + (defined(_BYTE_ORDER) && _BYTE_ORDER == _LITTLE_ENDIAN) || \ + (defined(BYTE_ORDER) && BYTE_ORDER == LITTLE_ENDIAN) /* mingw header */ || \ + (defined(__sun) && defined(__SVR4) && defined(_LITTLE_ENDIAN)) || /* solaris */ \ + defined(__ARMEL__) || defined(__THUMBEL__) || defined(__AARCH64EL__) || defined(_MIPSEL) || defined(__MIPSEL) || \ + defined(__MIPSEL__) || defined(_M_IX86) || defined(_M_X64) || defined(_M_IA64) || /* msvc for intel processors */ \ + defined(_M_ARM) /* msvc code on arm executes in little endian mode */ +#define __LITTLE_ENDIAN__ +#endif +#endif + +#ifdef bswap16 +#undef bswap16 +#endif +#ifdef bswap32 +#undef bswap32 +#endif +#ifdef bswap64 +#undef bswap64 +#endif + +/* Define byte-swap functions, using fast processor-native built-ins where possible */ +// needs to be first because msvc doesn't short-circuit after failing defined(__has_builtin) +#if defined(_MSC_VER) +#define bswap16(x) _byteswap_ushort((x)) +#define bswap32(x) _byteswap_ulong((x)) +#define bswap64(x) _byteswap_uint64((x)) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define bswap16(x) __builtin_bswap16((x)) +#define bswap32(x) __builtin_bswap32((x)) +#define bswap64(x) __builtin_bswap64((x)) +#elif defined(__has_builtin) && __has_builtin(__builtin_bswap64) +/* for clang; gcc 5 fails on this and && shortcircuit fails; must be after GCC check */ +#define bswap16(x) __builtin_bswap16((x)) +#define bswap32(x) __builtin_bswap32((x)) +#define bswap64(x) __builtin_bswap64((x)) +#else +/* even in this case, compilers often optimize by using native instructions */ +static inline uint16_t bswap16(uint16_t x) { + return (((x >> 8) & 0xffu) | ((x & 0xffu) << 8)); +} +static inline uint32_t bswap32(uint32_t x) { + return (((x & 0xff000000u) >> 24) | ((x & 0x00ff0000u) >> 8) | ((x & 0x0000ff00u) << 8) | + ((x & 0x000000ffu) << 24)); +} +static inline uint64_t bswap64(uint64_t x) { + return (((x & 0xff00000000000000ull) >> 56) | ((x & 0x00ff000000000000ull) >> 40) | + ((x & 0x0000ff0000000000ull) >> 24) | ((x & 0x000000ff00000000ull) >> 8) | + ((x & 0x00000000ff000000ull) << 8) | ((x & 0x0000000000ff0000ull) << 24) | + ((x & 0x000000000000ff00ull) << 40) | ((x & 0x00000000000000ffull) << 56)); +} +#endif + + +/* Defines network - host byte swaps as needed depending upon platform endianness */ +// note that network order is big endian) + +#if defined(__LITTLE_ENDIAN__) +#define ntoh16(x) bswap16((x)) +#define hton16(x) bswap16((x)) +#define ntoh32(x) bswap32((x)) +#define hton32(x) bswap32((x)) +#define ntoh64(x) bswap64((x)) +#define hton64(x) bswap64((x)) +#define lton16(x) (x) +#define lton32(x) (x) +#define lton64(x) (x) +#define ltonf(x) (x) +#define ltond(x) (x) +#define bton16(x) bswap16((x)) +#define bton32(x) bswap32((x)) +#define bton64(x) bswap64((x)) +#define btonf(x) htonf((x)) +#define btond(x) htond((x)) +#elif defined(__BIG_ENDIAN__) +#define ntoh16(x) (x) +#define hton16(x) (x) +#define ntoh32(x) (x) +#define hton32(x) (x) +#define ntoh64(x) (x) +#define hton64(x) (x) +#define bton16(x) (x) +#define bton32(x) (x) +#define bton64(x) (x) +#define btonf(x) (x) +#define btond(x) (x) +#define lton16(x) bswap16((x)) +#define lton32(x) bswap32((x)) +#define lton64(x) bswap64((x)) +#define ltonf(x) htonf((x)) +#define ltond(x) htond((x)) +#else +#warning "UNKNOWN Platform / endianness; network / host byte swaps not defined." +#endif + + +//! Convert 32-bit float from host to network byte order +static inline float htonf(float f) { +#ifdef __cplusplus + static_assert(sizeof(float) == sizeof(uint32_t), "Unexpected float format"); + uint32_t val = hton32(*(reinterpret_cast(&f))); + return *(reinterpret_cast(&val)); +#else + uint32_t val = hton32(*(const uint32_t *)(&f)); + return *((float *)(&val)); +#endif +} +#define ntohf(x) htonf((x)) + +//! Convert 64-bit double from host to network byte order +static inline double htond(double f) { +#ifdef __cplusplus + static_assert(sizeof(double) == sizeof(uint64_t), "Unexpected double format"); + uint64_t val = hton64(*(reinterpret_cast(&f))); + return *(reinterpret_cast(&val)); +#else + uint64_t val = hton64(*(const uint64_t *)(&f)); + return *((double *)(&val)); +#endif +} +#define ntohd(x) htond((x)) + +#endif //_ENDIANNESS_H diff --git a/Texture2DDecoder/etc.cpp b/Texture2DDecoder/etc.cpp new file mode 100644 index 0000000..6b08758 --- /dev/null +++ b/Texture2DDecoder/etc.cpp @@ -0,0 +1,443 @@ +#include "etc.h" +#include +#include +#include "color.h" + +const uint_fast8_t WriteOrderTable[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; +const uint_fast8_t WriteOrderTableRev[16] = {15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0}; +const uint_fast8_t Etc1ModifierTable[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, + {18, 60}, {24, 80}, {33, 106}, {47, 183}}; +const uint_fast8_t Etc2aModifierTable[2][8][2] = { + {{0, 8}, {0, 17}, {0, 29}, {0, 42}, {0, 60}, {0, 80}, {0, 106}, {0, 183}}, + {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}}}; +const uint_fast8_t Etc1SubblockTable[2][16] = {{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}}; +const uint_fast8_t Etc2DistanceTable[8] = {3, 6, 11, 16, 23, 32, 41, 64}; +const int_fast8_t Etc2AlphaModTable[16][8] = { + {-3, -6, -9, -15, 2, 5, 8, 14}, {-3, -7, -10, -13, 2, 6, 9, 12}, {-2, -5, -8, -13, 1, 4, 7, 12}, + {-2, -4, -6, -13, 1, 3, 5, 12}, {-3, -6, -8, -12, 2, 5, 7, 11}, {-3, -7, -9, -11, 2, 6, 8, 10}, + {-4, -7, -8, -11, 3, 6, 7, 10}, {-3, -5, -8, -11, 2, 4, 7, 10}, {-2, -6, -8, -10, 1, 5, 7, 9}, + {-2, -5, -8, -10, 1, 4, 7, 9}, {-2, -4, -8, -10, 1, 3, 7, 9}, {-2, -5, -7, -10, 1, 4, 6, 9}, + {-3, -4, -7, -10, 2, 3, 6, 9}, {-1, -2, -3, -10, 0, 1, 2, 9}, {-4, -6, -8, -9, 3, 5, 7, 8}, + {-3, -5, -7, -9, 2, 4, 6, 8}}; + +static inline uint_fast8_t clamp(const int n) { + return n < 0 ? 0 : n > 255 ? 255 : n; +} + +static inline uint32_t applicate_color(uint_fast8_t c[3], int_fast16_t m) { + return color(clamp(c[0] + m), clamp(c[1] + m), clamp(c[2] + m), 255); +} + +static inline uint32_t applicate_color_alpha(uint_fast8_t c[3], int_fast16_t m, int transparent) { + return color(clamp(c[0] + m), clamp(c[1] + m), clamp(c[2] + m), transparent ? 0 : 255); +} + +static inline uint32_t applicate_color_raw(uint_fast8_t c[3]) { + return color(c[0], c[1], c[2], 255); +} + +static void decode_etc1_block(const uint8_t *data, uint32_t *outbuf) { + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; // Table codewords + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; + uint_fast8_t c[2][3]; + if (data[3] & 2) { + // diff bit == 1 + c[0][0] = data[0] & 0xf8; + c[0][1] = data[1] & 0xf8; + c[0][2] = data[2] & 0xf8; + c[1][0] = c[0][0] + (data[0] << 3 & 0x18) - (data[0] << 3 & 0x20); + c[1][1] = c[0][1] + (data[1] << 3 & 0x18) - (data[1] << 3 & 0x20); + c[1][2] = c[0][2] + (data[2] << 3 & 0x18) - (data[2] << 3 & 0x20); + c[0][0] |= c[0][0] >> 5; + c[0][1] |= c[0][1] >> 5; + c[0][2] |= c[0][2] >> 5; + c[1][0] |= c[1][0] >> 5; + c[1][1] |= c[1][1] >> 5; + c[1][2] |= c[1][2] >> 5; + } else { + // diff bit == 0 + c[0][0] = (data[0] & 0xf0) | data[0] >> 4; + c[1][0] = (data[0] & 0x0f) | data[0] << 4; + c[0][1] = (data[1] & 0xf0) | data[1] >> 4; + c[1][1] = (data[1] & 0x0f) | data[1] << 4; + c[0][2] = (data[2] & 0xf0) | data[2] >> 4; + c[1][2] = (data[2] & 0x0f) | data[2] << 4; + } + + uint_fast16_t j = data[6] << 8 | data[7]; // less significant pixel index bits + uint_fast16_t k = data[4] << 8 | data[5]; // more significant pixel index bits + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + uint_fast8_t s = table[i]; + uint_fast8_t m = Etc1ModifierTable[code[s]][j & 1]; + outbuf[WriteOrderTable[i]] = applicate_color(c[s], k & 1 ? -m : m); + } +} + +static void decode_etc2_block(const uint8_t *data, uint32_t *outbuf) { + uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 + uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 + uint_fast8_t c[3][3] = {}; + + if (data[3] & 2) { + // diff bit == 1 + uint_fast8_t r = data[0] & 0xf8; + int_fast16_t dr = (data[0] << 3 & 0x18) - (data[0] << 3 & 0x20); + uint_fast8_t g = data[1] & 0xf8; + int_fast16_t dg = (data[1] << 3 & 0x18) - (data[1] << 3 & 0x20); + uint_fast8_t b = data[2] & 0xf8; + int_fast16_t db = (data[2] << 3 & 0x18) - (data[2] << 3 & 0x20); + if (r + dr < 0 || r + dr > 255) { + // T + c[0][0] = (data[0] << 3 & 0xc0) | (data[0] << 4 & 0x30) | (data[0] >> 1 & 0xc) | (data[0] & 3); + c[0][1] = (data[1] & 0xf0) | data[1] >> 4; + c[0][2] = (data[1] & 0x0f) | data[1] << 4; + c[1][0] = (data[2] & 0xf0) | data[2] >> 4; + c[1][1] = (data[2] & 0x0f) | data[2] << 4; + c[1][2] = (data[3] & 0xf0) | data[3] >> 4; + const uint_fast8_t d = Etc2DistanceTable[(data[3] >> 1 & 6) | (data[3] & 1)]; + uint_fast32_t color_set[4] = {applicate_color_raw(c[0]), applicate_color(c[1], d), + applicate_color_raw(c[1]), applicate_color(c[1], -d)}; + k <<= 1; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) + outbuf[WriteOrderTable[i]] = color_set[(k & 2) | (j & 1)]; + } else if (g + dg < 0 || g + dg > 255) { + // H + c[0][0] = (data[0] << 1 & 0xf0) | (data[0] >> 3 & 0xf); + c[0][1] = (data[0] << 5 & 0xe0) | (data[1] & 0x10); + c[0][1] |= c[0][1] >> 4; + c[0][2] = (data[1] & 8) | (data[1] << 1 & 6) | data[2] >> 7; + c[0][2] |= c[0][2] << 4; + c[1][0] = (data[2] << 1 & 0xf0) | (data[2] >> 3 & 0xf); + c[1][1] = (data[2] << 5 & 0xe0) | (data[3] >> 3 & 0x10); + c[1][1] |= c[1][1] >> 4; + c[1][2] = (data[3] << 1 & 0xf0) | (data[3] >> 3 & 0xf); + uint_fast8_t d = (data[3] & 4) | (data[3] << 1 & 2); + if (c[0][0] > c[1][0] || + (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) + ++d; + d = Etc2DistanceTable[d]; + uint_fast32_t color_set[4] = {applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), + applicate_color(c[1], -d)}; + k <<= 1; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) + outbuf[WriteOrderTable[i]] = color_set[(k & 2) | (j & 1)]; + } else if (b + db < 0 || b + db > 255) { + // planar + c[0][0] = (data[0] << 1 & 0xfc) | (data[0] >> 5 & 3); + c[0][1] = (data[0] << 7 & 0x80) | (data[1] & 0x7e) | (data[0] & 1); + c[0][2] = (data[1] << 7 & 0x80) | (data[2] << 2 & 0x60) | (data[2] << 3 & 0x18) | (data[3] >> 5 & 4); + c[0][2] |= c[0][2] >> 6; + c[1][0] = (data[3] << 1 & 0xf8) | (data[3] << 2 & 4) | (data[3] >> 5 & 3); + c[1][1] = (data[4] & 0xfe) | data[4] >> 7; + c[1][2] = (data[4] << 7 & 0x80) | (data[5] >> 1 & 0x7c); + c[1][2] |= c[1][2] >> 6; + c[2][0] = (data[5] << 5 & 0xe0) | (data[6] >> 3 & 0x1c) | (data[5] >> 1 & 3); + c[2][1] = (data[6] << 3 & 0xf8) | (data[7] >> 5 & 0x6) | (data[6] >> 4 & 1); + c[2][2] = data[7] << 2 | (data[7] >> 4 & 3); + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 4; x++, i++) { + uint8_t r = clamp((x * (c[1][0] - c[0][0]) + y * (c[2][0] - c[0][0]) + 4 * c[0][0] + 2) >> 2); + uint8_t g = clamp((x * (c[1][1] - c[0][1]) + y * (c[2][1] - c[0][1]) + 4 * c[0][1] + 2) >> 2); + uint8_t b = clamp((x * (c[1][2] - c[0][2]) + y * (c[2][2] - c[0][2]) + 4 * c[0][2] + 2) >> 2); + outbuf[i] = color(r, g, b, 255); + } + } + } else { + // differential + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; + c[0][0] = r | r >> 5; + c[0][1] = g | g >> 5; + c[0][2] = b | b >> 5; + c[1][0] = r + dr; + c[1][1] = g + dg; + c[1][2] = b + db; + c[1][0] |= c[1][0] >> 5; + c[1][1] |= c[1][1] >> 5; + c[1][2] |= c[1][2] >> 5; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + uint_fast8_t s = table[i]; + uint_fast8_t m = Etc1ModifierTable[code[s]][j & 1]; + outbuf[WriteOrderTable[i]] = applicate_color(c[s], k & 1 ? -m : m); + } + } + } else { + // individual (diff bit == 0) + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; + c[0][0] = (data[0] & 0xf0) | data[0] >> 4; + c[1][0] = (data[0] & 0x0f) | data[0] << 4; + c[0][1] = (data[1] & 0xf0) | data[1] >> 4; + c[1][1] = (data[1] & 0x0f) | data[1] << 4; + c[0][2] = (data[2] & 0xf0) | data[2] >> 4; + c[1][2] = (data[2] & 0x0f) | data[2] << 4; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + uint_fast8_t s = table[i]; + uint_fast8_t m = Etc1ModifierTable[code[s]][j & 1]; + outbuf[WriteOrderTable[i]] = applicate_color(c[s], k & 1 ? -m : m); + } + } +} + +static void decode_etc2a1_block(const uint8_t *data, uint32_t *outbuf) { + uint_fast16_t j = data[6] << 8 | data[7]; // 15 -> 0 + uint_fast32_t k = data[4] << 8 | data[5]; // 31 -> 16 + uint_fast8_t c[3][3] = {}; + + int obaq = data[3] >> 1 & 1; + + // diff bit == 1 + uint_fast8_t r = data[0] & 0xf8; + int_fast16_t dr = (data[0] << 3 & 0x18) - (data[0] << 3 & 0x20); + uint_fast8_t g = data[1] & 0xf8; + int_fast16_t dg = (data[1] << 3 & 0x18) - (data[1] << 3 & 0x20); + uint_fast8_t b = data[2] & 0xf8; + int_fast16_t db = (data[2] << 3 & 0x18) - (data[2] << 3 & 0x20); + if (r + dr < 0 || r + dr > 255) { + // T + c[0][0] = (data[0] << 3 & 0xc0) | (data[0] << 4 & 0x30) | (data[0] >> 1 & 0xc) | (data[0] & 3); + c[0][1] = (data[1] & 0xf0) | data[1] >> 4; + c[0][2] = (data[1] & 0x0f) | data[1] << 4; + c[1][0] = (data[2] & 0xf0) | data[2] >> 4; + c[1][1] = (data[2] & 0x0f) | data[2] << 4; + c[1][2] = (data[3] & 0xf0) | data[3] >> 4; + const uint_fast8_t d = Etc2DistanceTable[(data[3] >> 1 & 6) | (data[3] & 1)]; + uint_fast32_t color_set[4] = {applicate_color_raw(c[0]), applicate_color(c[1], d), applicate_color_raw(c[1]), + applicate_color(c[1], -d)}; + k <<= 1; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + int index = (k & 2) | (j & 1); + outbuf[WriteOrderTable[i]] = color_set[index]; + if (!obaq && index == 2) + outbuf[WriteOrderTable[i]] &= TRANSPARENT_MASK; + } + } else if (g + dg < 0 || g + dg > 255) { + // H + c[0][0] = (data[0] << 1 & 0xf0) | (data[0] >> 3 & 0xf); + c[0][1] = (data[0] << 5 & 0xe0) | (data[1] & 0x10); + c[0][1] |= c[0][1] >> 4; + c[0][2] = (data[1] & 8) | (data[1] << 1 & 6) | data[2] >> 7; + c[0][2] |= c[0][2] << 4; + c[1][0] = (data[2] << 1 & 0xf0) | (data[2] >> 3 & 0xf); + c[1][1] = (data[2] << 5 & 0xe0) | (data[3] >> 3 & 0x10); + c[1][1] |= c[1][1] >> 4; + c[1][2] = (data[3] << 1 & 0xf0) | (data[3] >> 3 & 0xf); + uint_fast8_t d = (data[3] & 4) | (data[3] << 1 & 2); + if (c[0][0] > c[1][0] || + (c[0][0] == c[1][0] && (c[0][1] > c[1][1] || (c[0][1] == c[1][1] && c[0][2] >= c[1][2])))) + ++d; + d = Etc2DistanceTable[d]; + uint_fast32_t color_set[4] = {applicate_color(c[0], d), applicate_color(c[0], -d), applicate_color(c[1], d), + applicate_color(c[1], -d)}; + k <<= 1; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + int index = (k & 2) | (j & 1); + outbuf[WriteOrderTable[i]] = color_set[index]; + if (!obaq && index == 2) + outbuf[WriteOrderTable[i]] &= TRANSPARENT_MASK; + } + } else if (b + db < 0 || b + db > 255) { + // planar + c[0][0] = (data[0] << 1 & 0xfc) | (data[0] >> 5 & 3); + c[0][1] = (data[0] << 7 & 0x80) | (data[1] & 0x7e) | (data[0] & 1); + c[0][2] = (data[1] << 7 & 0x80) | (data[2] << 2 & 0x60) | (data[2] << 3 & 0x18) | (data[3] >> 5 & 4); + c[0][2] |= c[0][2] >> 6; + c[1][0] = (data[3] << 1 & 0xf8) | (data[3] << 2 & 4) | (data[3] >> 5 & 3); + c[1][1] = (data[4] & 0xfe) | data[4] >> 7; + c[1][2] = (data[4] << 7 & 0x80) | (data[5] >> 1 & 0x7c); + c[1][2] |= c[1][2] >> 6; + c[2][0] = (data[5] << 5 & 0xe0) | (data[6] >> 3 & 0x1c) | (data[5] >> 1 & 3); + c[2][1] = (data[6] << 3 & 0xf8) | (data[7] >> 5 & 0x6) | (data[6] >> 4 & 1); + c[2][2] = data[7] << 2 | (data[7] >> 4 & 3); + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 4; x++, i++) { + uint8_t r = clamp((x * (c[1][0] - c[0][0]) + y * (c[2][0] - c[0][0]) + 4 * c[0][0] + 2) >> 2); + uint8_t g = clamp((x * (c[1][1] - c[0][1]) + y * (c[2][1] - c[0][1]) + 4 * c[0][1] + 2) >> 2); + uint8_t b = clamp((x * (c[1][2] - c[0][2]) + y * (c[2][2] - c[0][2]) + 4 * c[0][2] + 2) >> 2); + outbuf[i] = color(r, g, b, 255); + } + } + } else { + // differential + const uint_fast8_t code[2] = {data[3] >> 5, data[3] >> 2 & 7}; + const uint_fast8_t *table = Etc1SubblockTable[data[3] & 1]; + c[0][0] = r | r >> 5; + c[0][1] = g | g >> 5; + c[0][2] = b | b >> 5; + c[1][0] = r + dr; + c[1][1] = g + dg; + c[1][2] = b + db; + c[1][0] |= c[1][0] >> 5; + c[1][1] |= c[1][1] >> 5; + c[1][2] |= c[1][2] >> 5; + for (int i = 0; i < 16; i++, j >>= 1, k >>= 1) { + uint_fast8_t s = table[i]; + uint_fast8_t m = Etc2aModifierTable[obaq][code[s]][j & 1]; + outbuf[WriteOrderTable[i]] = applicate_color_alpha(c[s], k & 1 ? -m : m, !obaq && (k & 1) && !(j & 1)); + } + } +} + +static void decode_etc2a8_block(const uint8_t *data, uint32_t *outbuf) { + if (data[1] & 0xf0) { + // multiplier != 0 + const uint_fast8_t multiplier = data[1] >> 4; + const int_fast8_t *table = Etc2AlphaModTable[data[1] & 0xf]; + uint_fast64_t l = bton64(*(uint64_t*)data); + for (int i = 0; i < 16; i++, l >>= 3) + ((uint8_t *)(outbuf + WriteOrderTableRev[i]))[3] = clamp(data[0] + multiplier * table[l & 7]); + } else { + // multiplier == 0 (always same as base codeword) + for (int i = 0; i < 16; i++, outbuf++) + ((uint8_t *)outbuf)[3] = data[0]; + } +} + +static void decode_eac_block(const uint8_t *data, int color, uint32_t *outbuf) { + uint_fast8_t multiplier = data[1] >> 1 & 0x78; + if (multiplier == 0) + multiplier = 1; + const int_fast8_t *table = Etc2AlphaModTable[data[1] & 0xf]; + uint_fast64_t l = bton64(*(uint64_t*)data); + for (int i = 0; i < 16; i++, l >>= 3) { + int_fast16_t val = data[0] * 8 + multiplier * table[l & 7] + 4; + ((uint8_t *)(outbuf + WriteOrderTableRev[i]))[color] = val < 0 ? 0 : val >= 2048 ? 0xff : val >> 3; + } +} + +static void decode_eac_signed_block(const uint8_t *data, int color, uint32_t *outbuf) { + int8_t base = (int8_t)data[0]; + uint_fast8_t multiplier = data[1] >> 1 & 0x78; + if (multiplier == 0) + multiplier = 1; + const int_fast8_t *table = Etc2AlphaModTable[data[1] & 0xf]; + uint_fast64_t l = bton64(*(uint64_t*)data); + for (int i = 0; i < 16; i++, l >>= 3) { + int_fast16_t val = base * 8 + multiplier * table[l & 7] + 1023; + ((uint8_t *)(outbuf + WriteOrderTableRev[i]))[color] = val < 0 ? 0 : val >= 2048 ? 0xff : val >> 3; + } +} + +int decode_etc1(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc1_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_etc2(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc2_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_etc2a1(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + decode_etc2a1_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_etc2a8(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 16) { + decode_etc2_block(data + 8, buffer); + decode_etc2a8_block(data, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_eacr(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + uint32_t base_buffer[16]; + for (int i = 0; i < 16; i++) + base_buffer[i] = color(0, 0, 0, 255); + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + memcpy(buffer, base_buffer, sizeof(buffer)); + decode_eac_block(data, 2, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_eacr_signed(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + uint32_t base_buffer[16]; + for (int i = 0; i < 16; i++) + base_buffer[i] = color(0, 0, 0, 255); + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 8) { + memcpy(buffer, base_buffer, sizeof(buffer)); + decode_eac_signed_block(data, 2, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_eacrg(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + uint32_t base_buffer[16]; + for (int i = 0; i < 16; i++) + base_buffer[i] = color(0, 0, 0, 255); + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 16) { + memcpy(buffer, base_buffer, sizeof(buffer)); + decode_eac_block(data, 2, buffer); + decode_eac_block(data + 8, 1, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} + +int decode_eacrg_signed(const uint8_t *data, const long w, const long h, uint32_t *image) { + long num_blocks_x = (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + uint32_t buffer[16]; + uint32_t base_buffer[16]; + for (int i = 0; i < 16; i++) + base_buffer[i] = color(0, 0, 0, 255); + for (long by = 0; by < num_blocks_y; by++) { + for (long bx = 0; bx < num_blocks_x; bx++, data += 16) { + memcpy(buffer, base_buffer, sizeof(buffer)); + decode_eac_signed_block(data, 2, buffer); + decode_eac_signed_block(data + 8, 1, buffer); + copy_block_buffer(bx, by, w, h, 4, 4, buffer, image); + } + } + return 1; +} diff --git a/Texture2DDecoder/etc.h b/Texture2DDecoder/etc.h new file mode 100644 index 0000000..5546b8a --- /dev/null +++ b/Texture2DDecoder/etc.h @@ -0,0 +1,15 @@ +#ifndef ETC_H +#define ETC_H + +#include + +int decode_etc1(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2a1(const uint8_t *, const long, const long, uint32_t *); +int decode_etc2a8(const uint8_t *, const long, const long, uint32_t *); +int decode_eacr(const uint8_t *, const long, const long, uint32_t *); +int decode_eacr_signed(const uint8_t *, const long, const long, uint32_t *); +int decode_eacrg(const uint8_t *, const long, const long, uint32_t *); +int decode_eacrg_signed(const uint8_t *, const long, const long, uint32_t *); + +#endif /* end of include guard: ETC_H */ diff --git a/Texture2DDecoder/fp16.h b/Texture2DDecoder/fp16.h new file mode 100644 index 0000000..b7f0c1d --- /dev/null +++ b/Texture2DDecoder/fp16.h @@ -0,0 +1,36 @@ +#pragma once +#ifndef FP16_H +#define FP16_H + +#include "fp16/fp16.h" + +#endif /* FP16_H */ + +/* + * + * License Information + * + * FP16 library is derived from https://github.com/Maratyszcza/FP16. + * The library is licensed under the MIT License shown below. + * + * + * The MIT License (MIT) + * + * Copyright (c) 2017 Facebook Inc. + * Copyright (c) 2017 Georgia Institute of Technology + * Copyright 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ diff --git a/Texture2DDecoder/fp16/bitcasts.h b/Texture2DDecoder/fp16/bitcasts.h new file mode 100644 index 0000000..26a755c --- /dev/null +++ b/Texture2DDecoder/fp16/bitcasts.h @@ -0,0 +1,76 @@ +#pragma once +#ifndef FP16_BITCASTS_H +#define FP16_BITCASTS_H + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + #include +#elif !defined(__OPENCL_VERSION__) + #include +#endif + + +static inline float fp32_from_bits(uint32_t w) { +#if defined(__OPENCL_VERSION__) + return as_float(w); +#elif defined(__CUDA_ARCH__) + return __uint_as_float((unsigned int) w); +#elif defined(__INTEL_COMPILER) + return _castu32_f32(w); +#else + union { + uint32_t as_bits; + float as_value; + } fp32 = { w }; + return fp32.as_value; +#endif +} + +static inline uint32_t fp32_to_bits(float f) { +#if defined(__OPENCL_VERSION__) + return as_uint(f); +#elif defined(__CUDA_ARCH__) + return (uint32_t) __float_as_uint(f); +#elif defined(__INTEL_COMPILER) + return _castf32_u32(f); +#else + union { + float as_value; + uint32_t as_bits; + } fp32 = { f }; + return fp32.as_bits; +#endif +} + +static inline double fp64_from_bits(uint64_t w) { +#if defined(__OPENCL_VERSION__) + return as_double(w); +#elif defined(__CUDA_ARCH__) + return __longlong_as_double((long long) w); +#elif defined(__INTEL_COMPILER) + return _castu64_f64(w); +#else + union { + uint64_t as_bits; + double as_value; + } fp64 = { w }; + return fp64.as_value; +#endif +} + +static inline uint64_t fp64_to_bits(double f) { +#if defined(__OPENCL_VERSION__) + return as_ulong(f); +#elif defined(__CUDA_ARCH__) + return (uint64_t) __double_as_longlong(f); +#elif defined(__INTEL_COMPILER) + return _castf64_u64(f); +#else + union { + double as_value; + uint64_t as_bits; + } fp64 = { f }; + return fp64.as_bits; +#endif +} + +#endif /* FP16_BITCASTS_H */ diff --git a/Texture2DDecoder/fp16/fp16.h b/Texture2DDecoder/fp16/fp16.h new file mode 100644 index 0000000..642f2f8 --- /dev/null +++ b/Texture2DDecoder/fp16/fp16.h @@ -0,0 +1,451 @@ +#pragma once +#ifndef FP16_FP16_H +#define FP16_FP16_H + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + #include + #include +#elif !defined(__OPENCL_VERSION__) + #include + #include +#endif + +#ifdef _MSC_VER + #include +#endif + +#include "fp16/bitcasts.h" + + +/* + * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to + * a 32-bit floating-point number in IEEE single-precision format, in bit representation. + * + * @note The implementation doesn't use any floating-point operations. + */ +static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: + * + * +---+-----+------------+-------------------+ + * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 30 27-31 17-26 0-16 + */ + const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); + /* + * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. + * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. + * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift + * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the + * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). + */ +#ifdef _MSC_VER + unsigned long nonsign_bsr; + _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); + uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; +#else + uint32_t renorm_shift = __builtin_clz(nonsign); +#endif + renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; + /* + * Iff half-precision number has exponent of 15, the addition overflows it into bit 31, + * and the subsequent shift turns the high 9 bits into 1. Thus + * inf_nan_mask == + * 0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity) + * 0x00000000 otherwise + */ + const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000); + /* + * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. + * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus + * zero_mask == + * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) + * 0x00000000 otherwise + */ + const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; + /* + * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) + * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa + * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. + * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias + * (0x7F for single-precision number less 0xF for half-precision number). + * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift + * is less than 0x70, this can be combined with step 3. + * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity. + * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. + * 7. Combine with the sign of the input number. + */ + return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask); +} + +/* + * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to + * a 32-bit floating-point number in IEEE single-precision format. + * + * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) + * floating-point operations and bitcasts between integer and floating-point variables. + */ +static inline float fp16_ieee_to_fp32_value(uint16_t h) { + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: + * + * +-----+------------+---------------------+ + * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| + * +-----+------------+---------------------+ + * Bits 27-31 17-26 0-16 + */ + const uint32_t two_w = w + w; + + /* + * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent + * of a single-precision floating-point number: + * + * S|Exponent | Mantissa + * +-+---+-----+------------+----------------+ + * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| + * +-+---+-----+------------+----------------+ + * Bits | 23-31 | 0-22 + * + * Next, there are some adjustments to the exponent: + * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision + * formats (0x7F - 0xF = 0x70) + * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number. + * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent + * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps: + * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested + * by the difference in the exponent bias (see above). + * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of + * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias. + * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least + * partially IEEE754-compliant implementations. + * + * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not + * operate on denormal inputs, and do not produce denormal results. + */ + const uint32_t exp_offset = UINT32_C(0xE0) << 23; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float exp_scale = 0x1.0p-112f; +#else + const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); +#endif + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; + + /* + * Convert denormalized half-precision inputs into single-precision results (always normalized). + * Zero inputs are also handled here. + * + * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. + * First, we shift mantissa into bits 0-9 of the 32-bit word. + * + * zeros | mantissa + * +---------------------------+------------+ + * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| + * +---------------------------+------------+ + * Bits 10-31 0-9 + * + * Now, remember that denormalized half-precision numbers are represented as: + * FP16 = mantissa * 2**(-24). + * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input + * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). + * A normalized single-precision floating-point number is represented as: + * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) + * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision + * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. + * + * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number + * is zero, the constructed single-precision number has the value of + * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 + * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of + * the input half-precision number. + */ + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + /* + * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the + * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the + * input is either a denormal number, or zero. + * - Combine the result of conversion of exponent and mantissa with the sign of the input number. + */ + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in + * IEEE half-precision format, in bit representation. + * + * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) + * floating-point operations and bitcasts between integer and floating-point variables. + */ +static inline uint16_t fp16_ieee_from_fp32_value(float f) { +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float scale_to_inf = 0x1.0p+112f; + const float scale_to_zero = 0x1.0p-110f; +#else + const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); + const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); +#endif + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); +} + +/* + * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to + * a 32-bit floating-point number in IEEE single-precision format, in bit representation. + * + * @note The implementation doesn't use any floating-point operations. + */ +static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) { + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: + * + * +---+-----+------------+-------------------+ + * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 30 27-31 17-26 0-16 + */ + const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); + /* + * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. + * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. + * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift + * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the + * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). + */ +#ifdef _MSC_VER + unsigned long nonsign_bsr; + _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); + uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; +#else + uint32_t renorm_shift = __builtin_clz(nonsign); +#endif + renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; + /* + * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. + * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus + * zero_mask == + * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) + * 0x00000000 otherwise + */ + const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; + /* + * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) + * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa + * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. + * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias + * (0x7F for single-precision number less 0xF for half-precision number). + * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift + * is less than 0x70, this can be combined with step 3. + * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. + * 6. Combine with the sign of the input number. + */ + return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask); +} + +/* + * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to + * a 32-bit floating-point number in IEEE single-precision format. + * + * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) + * floating-point operations and bitcasts between integer and floating-point variables. + */ +static inline float fp16_alt_to_fp32_value(uint16_t h) { + /* + * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: + * +---+-----+------------+-------------------+ + * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| + * +---+-----+------------+-------------------+ + * Bits 31 26-30 16-25 0-15 + * + * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. + */ + const uint32_t w = (uint32_t) h << 16; + /* + * Extract the sign of the input number into the high bit of the 32-bit word: + * + * +---+----------------------------------+ + * | S |0000000 00000000 00000000 00000000| + * +---+----------------------------------+ + * Bits 31 0-31 + */ + const uint32_t sign = w & UINT32_C(0x80000000); + /* + * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: + * + * +-----+------------+---------------------+ + * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| + * +-----+------------+---------------------+ + * Bits 27-31 17-26 0-16 + */ + const uint32_t two_w = w + w; + + /* + * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent + * of a single-precision floating-point number: + * + * S|Exponent | Mantissa + * +-+---+-----+------------+----------------+ + * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| + * +-+---+-----+------------+----------------+ + * Bits | 23-31 | 0-22 + * + * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision + * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest + * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision + * exponent for non-finite values). + * + * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not + * operate on denormal inputs, and do not produce denormal results. + */ + const float exp_offset = UINT32_C(0x70) << 23; + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset); + + /* + * Convert denormalized half-precision inputs into single-precision results (always normalized). + * Zero inputs are also handled here. + * + * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. + * First, we shift mantissa into bits 0-9 of the 32-bit word. + * + * zeros | mantissa + * +---------------------------+------------+ + * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| + * +---------------------------+------------+ + * Bits 10-31 0-9 + * + * Now, remember that denormalized half-precision numbers are represented as: + * FP16 = mantissa * 2**(-24). + * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input + * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). + * A normalized single-precision floating-point number is represented as: + * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) + * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision + * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. + * + * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number + * is zero, the constructed single-precision number has the value of + * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 + * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of + * the input half-precision number. + */ + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + /* + * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the + * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the + * input is either a denormal number, or zero. + * - Combine the result of conversion of exponent and mantissa with the sign of the input number. + */ + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} + +/* + * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in + * ARM alternative half-precision format, in bit representation. + * + * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) + * floating-point operations and bitcasts between integer and floating-point variables. + */ +static inline uint16_t fp16_alt_from_fp32_value(float f) { + const uint32_t w = fp32_to_bits(f); + const uint32_t sign = w & UINT32_C(0x80000000); + const uint32_t shl1_w = w + w; + + const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000); + const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w; + uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000); + const uint32_t exp_difference = 23 - 10; + const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24; + if (shl1_bias < shl1_bias_min) { + shl1_bias = shl1_bias_min; + } + + const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23)); + const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias; + + const uint32_t exp_f = fp32_to_bits(base) >> 13; + return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF))); +} + +#endif /* FP16_FP16_H */ diff --git a/Texture2DDecoder/pvrtc.cpp b/Texture2DDecoder/pvrtc.cpp new file mode 100644 index 0000000..9e4751f --- /dev/null +++ b/Texture2DDecoder/pvrtc.cpp @@ -0,0 +1,248 @@ +#include "pvrtc.h" +#include +#include +#include "color.h" +#include "endianness.h" + +static const int PVRTC1_STANDARD_WEIGHT[] = {0, 3, 5, 8}; +static const int PVRTC1_PUNCHTHROUGH_WEIGHT[] = {0, 4, 4, 8}; + +static inline long morton_index(const long x, const long y, const long min_dim) { + long offset = 0, shift = 0; + for (long mask = 1; mask < min_dim; mask <<= 1, shift++) + offset |= (((y & mask) | ((x & mask) << 1))) << shift; + offset |= ((x | y) >> shift) << (shift * 2); + return offset; +} + +static void get_texel_colors(const uint8_t *data, PVRTCTexelInfo *info) { + uint16_t ca = lton16(*(uint16_t *)(data + 4)); + uint16_t cb = lton16(*(uint16_t *)(data + 6)); + if (ca & 0x8000) { + info->a.r = ca >> 10 & 0x1f; + info->a.g = ca >> 5 & 0x1f; + info->a.b = (ca & 0x1e) | (ca >> 4 & 1); + info->a.a = 0xf; + } else { + info->a.r = (ca >> 7 & 0x1e) | (ca >> 11 & 1); + info->a.g = (ca >> 3 & 0x1e) | (ca >> 7 & 1); + info->a.b = (ca << 1 & 0x1c) | (ca >> 2 & 3); + info->a.a = ca >> 11 & 0xe; + } + if (cb & 0x8000) { + info->b.r = cb >> 10 & 0x1f; + info->b.g = cb >> 5 & 0x1f; + info->b.b = cb & 0x1f; + info->b.a = 0xf; + } else { + info->b.r = (cb >> 7 & 0x1e) | (cb >> 11 & 1); + info->b.g = (cb >> 3 & 0x1e) | (cb >> 7 & 1); + info->b.b = (cb << 1 & 0x1e) | (cb >> 3 & 1); + info->b.a = cb >> 11 & 0xe; + } +} + +static void get_texel_weights_4bpp(const uint8_t *data, PVRTCTexelInfo *info) { + info->punch_through_flag = 0; + + int mod_mode = data[4] & 1; + uint32_t mod_bits = lton32(*(uint32_t *)data); + + if (mod_mode) { + for (int i = 0; i < 16; i++, mod_bits >>= 2) { + info->weight[i] = PVRTC1_PUNCHTHROUGH_WEIGHT[mod_bits & 3]; + if ((mod_bits & 3) == 2) + info->punch_through_flag |= 1 << i; + } + } else { + for (int i = 0; i < 16; i++, mod_bits >>= 2) + info->weight[i] = PVRTC1_STANDARD_WEIGHT[mod_bits & 3]; + } +} + +static void get_texel_weights_2bpp(const uint8_t *data, PVRTCTexelInfo *info) { + info->punch_through_flag = 0; + + int mod_mode = data[4] & 1; + uint32_t mod_bits = lton32(*(uint32_t *)data); + + if (mod_mode) { + int fillflag = data[0] & 1 ? (data[2] & 0x10 ? -1 : -2) : -3; + for (int y = 0, i = 1; y < 4; ++y & 1 ? --i : ++i) + for (int x = 0; x < 4; x++, i += 2) + info->weight[i] = fillflag; + for (int y = 0, i = 0; y < 4; ++y & 1 ? ++i : --i) + for (int x = 0; x < 4; x++, i += 2, mod_bits >>= 2) + info->weight[i] = PVRTC1_STANDARD_WEIGHT[mod_bits & 3]; + info->weight[0] = (info->weight[0] + 3) & 8; + if (data[0] & 1) + info->weight[20] = (info->weight[20] + 3) & 8; + } else { + for (int i = 0; i < 32; i++, mod_bits >>= 1) + info->weight[i] = mod_bits & 1 ? 8 : 0; + } +} + +static void applicate_color_4bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[32]) { + static const int INTERP_WEIGHT[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; + PVRTCTexelColorInt clr_a[16] = {}, clr_b[16] = {}; + + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 4; x++, i++) { + for (int acy = 0, ac = 0; acy < 3; acy++) { + for (int acx = 0; acx < 3; acx++, ac++) { + int interp_weight = INTERP_WEIGHT[x][acx] * INTERP_WEIGHT[y][acy]; + clr_a[i].r += info[ac]->a.r * interp_weight; + clr_a[i].g += info[ac]->a.g * interp_weight; + clr_a[i].b += info[ac]->a.b * interp_weight; + clr_a[i].a += info[ac]->a.a * interp_weight; + clr_b[i].r += info[ac]->b.r * interp_weight; + clr_b[i].g += info[ac]->b.g * interp_weight; + clr_b[i].b += info[ac]->b.b * interp_weight; + clr_b[i].a += info[ac]->b.a * interp_weight; + } + } + clr_a[i].r = (clr_a[i].r >> 1) + (clr_a[i].r >> 6); + clr_a[i].g = (clr_a[i].g >> 1) + (clr_a[i].g >> 6); + clr_a[i].b = (clr_a[i].b >> 1) + (clr_a[i].b >> 6); + clr_a[i].a = (clr_a[i].a) + (clr_a[i].a >> 4); + clr_b[i].r = (clr_b[i].r >> 1) + (clr_b[i].r >> 6); + clr_b[i].g = (clr_b[i].g >> 1) + (clr_b[i].g >> 6); + clr_b[i].b = (clr_b[i].b >> 1) + (clr_b[i].b >> 6); + clr_b[i].a = (clr_b[i].a) + (clr_b[i].a >> 4); + } + } + + const PVRTCTexelInfo *self_info = info[4]; + uint32_t punch_through_flag = self_info->punch_through_flag; + for (int i = 0; i < 16; i++, punch_through_flag >>= 1) { + buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8, + (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8, + (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8, + punch_through_flag & 1 + ? 0 + : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8); + } +} + +static void applicate_color_2bpp(const uint8_t *data, PVRTCTexelInfo *const info[9], uint32_t buf[32]) { + static const int INTERP_WEIGHT_X[8][3] = {{4, 4, 0}, {3, 5, 0}, {2, 6, 0}, {1, 7, 0}, + {0, 8, 0}, {0, 7, 1}, {0, 6, 2}, {0, 5, 3}}; + static const int INTERP_WEIGHT_Y[4][3] = {{2, 2, 0}, {1, 3, 0}, {0, 4, 0}, {0, 3, 1}}; + PVRTCTexelColorInt clr_a[32] = {}, clr_b[32] = {}; + + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 8; x++, i++) { + for (int acy = 0, ac = 0; acy < 3; acy++) { + for (int acx = 0; acx < 3; acx++, ac++) { + int interp_weight = INTERP_WEIGHT_X[x][acx] * INTERP_WEIGHT_Y[y][acy]; + clr_a[i].r += info[ac]->a.r * interp_weight; + clr_a[i].g += info[ac]->a.g * interp_weight; + clr_a[i].b += info[ac]->a.b * interp_weight; + clr_a[i].a += info[ac]->a.a * interp_weight; + clr_b[i].r += info[ac]->b.r * interp_weight; + clr_b[i].g += info[ac]->b.g * interp_weight; + clr_b[i].b += info[ac]->b.b * interp_weight; + clr_b[i].a += info[ac]->b.a * interp_weight; + } + } + clr_a[i].r = (clr_a[i].r >> 2) + (clr_a[i].r >> 7); + clr_a[i].g = (clr_a[i].g >> 2) + (clr_a[i].g >> 7); + clr_a[i].b = (clr_a[i].b >> 2) + (clr_a[i].b >> 7); + clr_a[i].a = (clr_a[i].a >> 1) + (clr_a[i].a >> 5); + clr_b[i].r = (clr_b[i].r >> 2) + (clr_b[i].r >> 7); + clr_b[i].g = (clr_b[i].g >> 2) + (clr_b[i].g >> 7); + clr_b[i].b = (clr_b[i].b >> 2) + (clr_b[i].b >> 7); + clr_b[i].a = (clr_b[i].a >> 1) + (clr_b[i].a >> 5); + } + } + + static const int POSYA[4][2] = {{1, 24}, {4, -8}, {4, -8}, {4, -8}}; + static const int POSYB[4][2] = {{4, 8}, {4, 8}, {4, 8}, {7, -24}}; + static const int POSXL[8][2] = {{3, 7}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}, {4, -1}}; + static const int POSXR[8][2] = {{4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {4, 1}, {5, -7}}; + + PVRTCTexelInfo *self_info = info[4]; + uint32_t punch_through_flag = self_info->punch_through_flag; + for (int y = 0, i = 0; y < 4; y++) { + for (int x = 0; x < 8; x++, i++, punch_through_flag >>= 1) { + switch (self_info->weight[i]) { + case -1: + self_info->weight[i] = + (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] + 1) / 2; + break; + case -2: + self_info->weight[i] = + (info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 1) / 2; + break; + case -3: + self_info->weight[i] = + (info[POSYA[y][0]]->weight[i + POSYA[y][1]] + info[POSYB[y][0]]->weight[i + POSYB[y][1]] + + info[POSXL[x][0]]->weight[i + POSXL[x][1]] + info[POSXR[x][0]]->weight[i + POSXR[x][1]] + 2) / + 4; + break; + } + buf[i] = color((clr_a[i].r * (8 - self_info->weight[i]) + clr_b[i].r * self_info->weight[i]) / 8, + (clr_a[i].g * (8 - self_info->weight[i]) + clr_b[i].g * self_info->weight[i]) / 8, + (clr_a[i].b * (8 - self_info->weight[i]) + clr_b[i].b * self_info->weight[i]) / 8, + punch_through_flag & 1 + ? 0 + : (clr_a[i].a * (8 - self_info->weight[i]) + clr_b[i].a * self_info->weight[i]) / 8); + } + } +} + +int decode_pvrtc(const uint8_t *data, const long w, const long h, uint32_t *image, const int is2bpp) { + long bw = is2bpp ? 8 : 4; + long num_blocks_x = is2bpp ? (w + 7) / 8 : (w + 3) / 4; + long num_blocks_y = (h + 3) / 4; + long num_blocks = num_blocks_x * num_blocks_y; + long min_num_blocks = num_blocks_x <= num_blocks_y ? num_blocks_x : num_blocks_y; + + if ((num_blocks_x & (num_blocks_x - 1)) || (num_blocks_y & (num_blocks_y - 1))) { + //extern const char* error_msg; + //error_msg = "the number of blocks of each side must be a power of 2"; + return 0; + } + + PVRTCTexelInfo *texel_info = (PVRTCTexelInfo *)malloc(sizeof(PVRTCTexelInfo) * num_blocks); + if (texel_info == NULL) { + //extern const char* error_msg; + //error_msg = "memory allocation failed"; + return 0; + } + + void (*get_texel_weights_func)(const uint8_t *, PVRTCTexelInfo *) = + is2bpp ? get_texel_weights_2bpp : get_texel_weights_4bpp; + void (*applicate_color_func)(const uint8_t *, PVRTCTexelInfo *const[9], uint32_t[32]) = + is2bpp ? applicate_color_2bpp : applicate_color_4bpp; + + const uint8_t *d = data; + for (long i = 0; i < num_blocks; i++, d += 8) { + get_texel_colors(d, &texel_info[i]); + get_texel_weights_func(d, &texel_info[i]); + } + + uint32_t buffer[32]; + PVRTCTexelInfo *local_info[9]; + long pos_x[3], pos_y[3]; + + for (long by = 0; by < num_blocks_y; by++) { + pos_y[0] = by == 0 ? num_blocks_y - 1 : by - 1; + pos_y[1] = by; + pos_y[2] = by == num_blocks_y - 1 ? 0 : by + 1; + for (long bx = 0, x = 0; bx < num_blocks_x; bx++, x += 4) { + pos_x[0] = bx == 0 ? num_blocks_x - 1 : bx - 1; + pos_x[1] = bx; + pos_x[2] = bx == num_blocks_x - 1 ? 0 : bx + 1; + for (long cy = 0, c = 0; cy < 3; cy++) + for (long cx = 0; cx < 3; cx++, c++) + local_info[c] = &texel_info[morton_index(pos_x[cx], pos_y[cy], min_num_blocks)]; + applicate_color_func(data + morton_index(bx, by, min_num_blocks) * 8, local_info, buffer); + copy_block_buffer(bx, by, w, h, bw, 4, buffer, image); + } + } + + free(texel_info); + return 1; +} diff --git a/Texture2DDecoder/pvrtc.h b/Texture2DDecoder/pvrtc.h new file mode 100644 index 0000000..f0a7e60 --- /dev/null +++ b/Texture2DDecoder/pvrtc.h @@ -0,0 +1,29 @@ +#ifndef PVRTC_H +#define PVRTC_H + +#include + +typedef struct { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; +} PVRTCTexelColor; + +typedef struct { + int r; + int g; + int b; + int a; +} PVRTCTexelColorInt; + +typedef struct { + PVRTCTexelColor a; + PVRTCTexelColor b; + int8_t weight[32]; + uint32_t punch_through_flag; +} PVRTCTexelInfo; + +int decode_pvrtc(const uint8_t *, const long, const long, uint32_t *, const int); + +#endif /* end of include guard: PVRTC_H */ diff --git a/Texture2DDecoder/unitycrunch.cpp b/Texture2DDecoder/unitycrunch.cpp new file mode 100644 index 0000000..9e6d658 --- /dev/null +++ b/Texture2DDecoder/unitycrunch.cpp @@ -0,0 +1,34 @@ +#include "unitycrunch.h" +#include +#include +#include "unitycrunch/crn_decomp.h" + +bool unity_crunch_unpack_level(const uint8_t* data, uint32_t data_size, uint32_t level_index, void** ret, uint32_t* ret_size) { + unitycrnd::crn_texture_info tex_info; + if (!unitycrnd::crnd_get_texture_info(data, data_size, &tex_info)) + { + return false; + } + + unitycrnd::crnd_unpack_context pContext = unitycrnd::crnd_unpack_begin(data, data_size); + if (!pContext) + { + return false; + } + + const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); + const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); + const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); + const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); + const crn_uint32 row_pitch = blocks_x * unitycrnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); + const crn_uint32 total_face_size = row_pitch * blocks_y; + *ret = new uint8_t[total_face_size]; + *ret_size = total_face_size; + if (!unitycrnd::crnd_unpack_level(pContext, ret, total_face_size, row_pitch, level_index)) + { + unitycrnd::crnd_unpack_end(pContext); + return false; + } + unitycrnd::crnd_unpack_end(pContext); + return true; +} \ No newline at end of file diff --git a/Texture2DDecoder/unitycrunch.h b/Texture2DDecoder/unitycrunch.h new file mode 100644 index 0000000..dd26ac6 --- /dev/null +++ b/Texture2DDecoder/unitycrunch.h @@ -0,0 +1,5 @@ +#pragma once + +#include + +bool unity_crunch_unpack_level(const uint8_t* data, uint32_t data_size, uint32_t level_index, void** ret, uint32_t* ret_size); \ No newline at end of file diff --git a/Texture2DDecoder/unitycrunch/crn_decomp.h b/Texture2DDecoder/unitycrunch/crn_decomp.h new file mode 100644 index 0000000..e38a143 --- /dev/null +++ b/Texture2DDecoder/unitycrunch/crn_decomp.h @@ -0,0 +1,3821 @@ +// File: crn_decomp.h - Fast CRN->DXTc texture transcoder header file library +// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC +// See Copyright Notice and license at the end of this file. +// +// This single header file contains *all* of the code necessary to unpack .CRN files to raw DXTn bits. +// It does NOT depend on the crn compression library. +// +// Note: This is a single file, stand-alone C++ library which is controlled by the use of the following macro: +// If CRND_INCLUDE_CRND_H is NOT defined, the header is included. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +#ifndef CRND_INCLUDE_CRND_H +#define CRND_INCLUDE_CRND_H + +// Include crn_defs.h (only to bring in some basic CRN-related types and structures). +#include "crn_defs.h" + +#include +#include +#ifdef _WIN32 +#include +#else +#include +#endif +#include +#include // needed for placement new, _msize, _expand + +#define CRND_RESTRICT __restrict + +#ifdef _MSC_VER +#pragma warning(disable : 4127) // warning C4127: conditional expression is constant +#endif + +#ifdef CRND_DEVEL +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x500 +#endif +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef +#define NOMINMAX +#endif +#include "windows.h" // only for IsDebuggerPresent(), DebugBreak(), and OutputDebugStringA() +#endif + +// File: crnd_types.h +namespace unitycrnd { +const crn_uint8 cUINT8_MIN = 0; +const crn_uint8 cUINT8_MAX = 0xFFU; +const uint16 cUINT16_MIN = 0; +const uint16 cUINT16_MAX = 0xFFFFU; +const uint32 cUINT32_MIN = 0; +const uint32 cUINT32_MAX = 0xFFFFFFFFU; + +const int8 cINT8_MIN = -128; +const int8 cINT8_MAX = 127; +const int16 cINT16_MIN = -32768; +const int16 cINT16_MAX = 32767; +const int32 cINT32_MIN = (-2147483647 - 1); +const int32 cINT32_MAX = 2147483647; + +enum eClear { cClear }; + +const uint32 cIntBits = 32U; + +template +struct int_traits { + enum { cMin = unitycrnd::cINT32_MIN, + cMax = unitycrnd::cINT32_MAX, + cSigned = true }; +}; + +template <> +struct int_traits { + enum { cMin = unitycrnd::cINT8_MIN, + cMax = unitycrnd::cINT8_MAX, + cSigned = true }; +}; +template <> +struct int_traits { + enum { cMin = unitycrnd::cINT16_MIN, + cMax = unitycrnd::cINT16_MAX, + cSigned = true }; +}; +template <> +struct int_traits { + enum { cMin = unitycrnd::cINT32_MIN, + cMax = unitycrnd::cINT32_MAX, + cSigned = true }; +}; + +template <> +struct int_traits { + enum { cMin = 0, + cMax = unitycrnd::cUINT8_MAX, + cSigned = false }; +}; +template <> +struct int_traits { + enum { cMin = 0, + cMax = unitycrnd::cUINT16_MAX, + cSigned = false }; +}; +template <> +struct int_traits { + enum { cMin = 0, + cMax = unitycrnd::cUINT32_MAX, + cSigned = false }; +}; + +struct empty_type {}; + +} // namespace unitycrnd + +// File: crnd_platform.h +namespace unitycrnd { + +bool crnd_is_debugger_present(); +void crnd_debug_break(); +void crnd_output_debug_string(const char* p); + +// actually in crnd_assert.cpp +void crnd_assert(const char* pExp, const char* pFile, unsigned line); +void crnd_fail(const char* pExp, const char* pFile, unsigned line); + +} // namespace unitycrnd + +// File: crnd_assert.h +namespace unitycrnd { +void crnd_assert(const char* pExp, const char* pFile, unsigned line); + +#ifdef NDEBUG +#define CRND_ASSERT(x) ((void)0) +#undef CRND_ASSERTS_ENABLED +#else +#define CRND_ASSERT(_exp) (void)((!!(_exp)) || (unitycrnd::crnd_assert(#_exp, __FILE__, __LINE__), 0)) +#define CRND_ASSERTS_ENABLED +#endif + +void crnd_trace(const char* pFmt, va_list args); +void crnd_trace(const char* pFmt, ...); + +} // namespace unitycrnd + +// File: crnd_helpers.h +namespace unitycrnd { +namespace helpers { +template +struct rel_ops { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator>(const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } +}; + +template +inline T* construct(T* p) { + return new (static_cast(p)) T; +} + +template +inline T* construct(T* p, const U& init) { + return new (static_cast(p)) T(init); +} + +template +void construct_array(T* p, uint32 n) { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T; +} + +template +void construct_array(T* p, uint32 n, const U& init) { + T* q = p + n; + for (; p != q; ++p) + new (static_cast(p)) T(init); +} + +template +inline void destruct(T* p) { + p->~T(); +} + +template +inline void destruct_array(T* p, uint32 n) { + T* q = p + n; + for (; p != q; ++p) + p->~T(); +} + +} // namespace helpers + +} // namespace unitycrnd + +// File: crnd_traits.h +namespace unitycrnd { +template +struct scalar_type { + enum { cFlag = false }; + static inline void construct(T* p) { helpers::construct(p); } + static inline void construct(T* p, const T& init) { helpers::construct(p, init); } + static inline void construct_array(T* p, uint32 n) { helpers::construct_array(p, n); } + static inline void destruct(T* p) { helpers::destruct(p); } + static inline void destruct_array(T* p, uint32 n) { helpers::destruct_array(p, n); } +}; + +template +struct scalar_type { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, uint32 n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T**) {} + static inline void destruct_array(T**, uint32) {} +}; + +#define CRND_DEFINE_BUILT_IN_TYPE(X) \ + template <> \ + struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint32 n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X*) {} \ + static inline void destruct_array(X*, uint32) {} \ + }; + +CRND_DEFINE_BUILT_IN_TYPE(bool) +CRND_DEFINE_BUILT_IN_TYPE(char) +CRND_DEFINE_BUILT_IN_TYPE(unsigned char) +CRND_DEFINE_BUILT_IN_TYPE(short) +CRND_DEFINE_BUILT_IN_TYPE(unsigned short) +CRND_DEFINE_BUILT_IN_TYPE(int) +CRND_DEFINE_BUILT_IN_TYPE(unsigned int) +CRND_DEFINE_BUILT_IN_TYPE(long) +CRND_DEFINE_BUILT_IN_TYPE(unsigned long) +CRND_DEFINE_BUILT_IN_TYPE(int64) +CRND_DEFINE_BUILT_IN_TYPE(uint64) +CRND_DEFINE_BUILT_IN_TYPE(float) +CRND_DEFINE_BUILT_IN_TYPE(double) +CRND_DEFINE_BUILT_IN_TYPE(long double) + +#undef CRND_DEFINE_BUILT_IN_TYPE + +// See: http://erdani.org/publications/cuj-2004-06.pdf + +template +struct bitwise_movable { + enum { cFlag = false }; +}; + +// Defines type Q as bitwise movable. +#define CRND_DEFINE_BITWISE_MOVABLE(Q) \ + template <> \ + struct bitwise_movable { \ + enum { cFlag = true }; \ + }; + +// From yasli_traits.h: +// Credit goes to Boost; +// also found in the C++ Templates book by Vandevoorde and Josuttis + +typedef char (&yes_t)[1]; +typedef char (&no_t)[2]; + +template +yes_t class_test(int U::*); +template +no_t class_test(...); + +template +struct is_class { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; +}; + +template +struct is_pointer { + enum { value = false }; +}; + +template +struct is_pointer { + enum { value = true }; +}; + +#define CRND_IS_POD(T) __is_pod(T) + +} // namespace unitycrnd + +// File: crnd_mem.h +namespace unitycrnd { +void* crnd_malloc(size_t size, size_t* pActual_size = NULL); +void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); +void crnd_free(void* p); +size_t crnd_msize(void* p); + +template +inline T* crnd_new() { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p); +} + +template +inline T* crnd_new(const T& init) { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p, init); +} + +template +inline T* crnd_new_array(uint32 num) { + if (!num) + num = 1; + + uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); + if (!q) + return NULL; + + T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + helpers::construct_array(p, num); + return p; +} + +template +inline void crnd_delete(T* p) { + if (p) { + helpers::destruct(p); + crnd_free(p); + } +} + +template +inline void crnd_delete_array(T* p) { + if (p) { + const uint32 num = reinterpret_cast(p)[-1]; + CRND_ASSERT(num && (num == ~reinterpret_cast(p)[-2])); + + helpers::destruct_array(p, num); + + crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); + } +} + +} // namespace unitycrnd + +// File: crnd_math.h +namespace unitycrnd { +namespace math { +const float cNearlyInfinite = 1.0e+37f; + +const float cDegToRad = 0.01745329252f; +const float cRadToDeg = 57.29577951f; + +extern uint32 g_bitmasks[32]; + +// Yes I know these should probably be pass by ref, not val: +// http://www.stepanovpapers.com/notes.pdf +// Just don't use them on non-simple (non built-in) types! +template +inline T minimum(T a, T b) { + return (a < b) ? a : b; +} + +template +inline T minimum(T a, T b, T c) { + return minimum(minimum(a, b), c); +} + +template +inline T maximum(T a, T b) { + return (a > b) ? a : b; +} + +template +inline T maximum(T a, T b, T c) { + return maximum(maximum(a, b), c); +} + +template +inline T clamp(T value, T low, T high) { + return (value < low) ? low : ((value > high) ? high : value); +} + +template +inline T square(T value) { + return value * value; +} + +inline bool is_power_of_2(uint32 x) { + return x && ((x & (x - 1U)) == 0U); +} + +// From "Hackers Delight" +inline int next_pow2(uint32 val) { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; +} + +// Returns the total number of bits needed to encode v. +inline uint32 total_bits(uint32 v) { + uint32 l = 0; + while (v > 0U) { + v >>= 1; + l++; + } + return l; +} + +inline uint floor_log2i(uint v) { + uint l = 0; + while (v > 1U) { + v >>= 1; + l++; + } + return l; +} + +inline uint ceil_log2i(uint v) { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; +} +} +} + +// File: crnd_utils.h +namespace unitycrnd { +namespace utils { +template +inline void zero_object(T& obj) { + memset(&obj, 0, sizeof(obj)); +} + +template +inline void zero_this(T* pObj) { + memset(pObj, 0, sizeof(*pObj)); +} + +template +inline void swap(T& left, T& right) { + T temp(left); + left = right; + right = temp; +} + +inline void invert_buf(void* pBuf, uint32 size) { + uint8* p = static_cast(pBuf); + + const uint32 half_size = size >> 1; + for (uint32 i = 0; i < half_size; i++) + swap(p[i], p[size - 1U - i]); +} + +static inline uint16 swap16(uint16 x) { + return static_cast((x << 8) | (x >> 8)); +} +static inline uint32 swap32(uint32 x) { + return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); +} + +uint32 compute_max_mips(uint32 width, uint32 height); + +} // namespace utils + +} // namespace unitycrnd + +// File: crnd_vector.h +namespace unitycrnd { +struct elemental_vector { + void* m_p; + uint32 m_size; + uint32 m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); + + bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); +}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // warning C4127: conditional expression is constant +#endif + +template +class vector : public helpers::rel_ops > { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() + : m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) { + } + + inline vector(const vector& other) + : m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) { + *this = other; + } + + inline vector(uint32 size) + : m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) { + resize(size); + } + + inline ~vector() { + clear(); + } + + // I don't like this. Not at all. But exceptions, or just failing suck worse. + inline bool get_alloc_failed() const { return m_alloc_failed; } + inline void clear_alloc_failed() { m_alloc_failed = false; } + + inline bool assign(const vector& other) { + if (this == &other) + return true; + + if (m_capacity == other.m_size) + resize(0); + else { + clear(); + + if (!increase_capacity(other.m_size, false)) + return false; + } + + if (scalar_type::cFlag) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32 i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return true; + } + + inline vector& operator=(const vector& other) { + assign(other); + return *this; + } + + inline const T* begin() const { return m_p; } + T* begin() { return m_p; } + + inline const T* end() const { return m_p + m_size; } + T* end() { return m_p + m_size; } + + inline bool empty() const { return !m_size; } + inline uint32 size() const { return m_size; } + inline uint32 capacity() const { return m_capacity; } + + inline const T& operator[](uint32 i) const { + CRND_ASSERT(i < m_size); + return m_p[i]; + } + inline T& operator[](uint32 i) { + CRND_ASSERT(i < m_size); + return m_p[i]; + } + + inline const T& front() const { + CRND_ASSERT(m_size); + return m_p[0]; + } + inline T& front() { + CRND_ASSERT(m_size); + return m_p[0]; + } + + inline const T& back() const { + CRND_ASSERT(m_size); + return m_p[m_size - 1]; + } + inline T& back() { + CRND_ASSERT(m_size); + return m_p[m_size - 1]; + } + + inline void clear() { + if (m_p) { + scalar_type::destruct_array(m_p, m_size); + crnd_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + + m_alloc_failed = false; + } + + inline bool reserve(uint32 new_capacity) { + if (!increase_capacity(new_capacity, false)) + return false; + + return true; + } + + inline bool resize(uint32 new_size) { + if (m_size != new_size) { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else { + if (new_size > m_capacity) { + if (!increase_capacity(new_size, new_size == (m_size + 1))) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + inline bool push_back(const T& obj) { + CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) { + if (!increase_capacity(m_size + 1, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void pop_back() { + CRND_ASSERT(m_size); + + if (m_size) { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline void insert(uint32 index, const T* p, uint32 n) { + CRND_ASSERT(index <= m_size); + if (!n) + return; + + const uint32 orig_size = m_size; + resize(m_size + n); + + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + const uint32 num_to_move = orig_size - index; + + for (uint32 i = 0; i < num_to_move; i++) { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + + pSrc = p; + pDst = m_p + index; + + for (uint32 i = 0; i < n; i++) { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + + inline void erase(uint32 start, uint32 n) { + CRND_ASSERT((start + n) <= m_size); + + if (!n) + return; + + const uint32 num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + T* pDst_end = pDst + num_to_move; + const T* pSrc = m_p + start + n; + + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + + m_size -= n; + } + + inline void erase(uint32 index) { + erase(index, 1); + } + + inline void erase(T* p) { + CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(p - m_p); + } + + inline bool operator==(const vector& rhs) const { + if (m_size != rhs.m_size) + return false; + else if (m_size) { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32 i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator<(const vector& rhs) const { + const uint32 min_size = math::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + void swap(vector& other) { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } + + private: + T* m_p; + uint32 m_size; + uint32 m_capacity; + bool m_alloc_failed; + + template + struct is_vector { + enum { cFlag = false }; + }; + template + struct is_vector > { + enum { cFlag = true }; + }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) { + helpers::construct(pDst, *pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } + + inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) { + if (!reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) { + m_alloc_failed = true; + return false; + } + return true; + } +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +extern void vector_test(); + +} // namespace unitycrnd + +// File: crnd_private.h +namespace unitycrnd { +const crn_header* crnd_get_header(const void* pData, uint32 data_size); + +} // namespace unitycrnd + +// File: checksum.h +namespace unitycrnd { +// crc16() intended for small buffers - doesn't use an acceleration table. +const uint16 cInitCRC16 = 0; +uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); + +} // namespace unitycrnd + +// File: crnd_color.h +namespace unitycrnd { +template +struct color_quad_component_traits { + enum { + cSigned = false, + cFloat = false, + cMin = cUINT8_MIN, + cMax = cUINT8_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = true, + cFloat = false, + cMin = cINT16_MIN, + cMax = cINT16_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = false, + cFloat = false, + cMin = cUINT16_MIN, + cMax = cUINT16_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = true, + cFloat = false, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = false, + cFloat = false, + cMin = cUINT32_MIN, + cMax = cUINT32_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; +}; + +template <> +struct color_quad_component_traits { + enum { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; +}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4201) // warning C4201: nonstandard extension used : nameless struct/union +#pragma warning(disable : 4127) // warning C4127: conditional expression is constant +#endif + +template +class color_quad : public helpers::rel_ops > { + static parameter_type clamp(parameter_type v) { + if (component_traits::cFloat) + return v; + else { + if (v < component_traits::cMin) + return component_traits::cMin; + else if (v > component_traits::cMax) + return component_traits::cMax; + return v; + } + } + + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; + + enum { cNumComps = 4 }; + + union { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; + + component_type c[cNumComps]; + }; + + inline color_quad() { + } + + inline color_quad(eClear) + : r(0), g(0), b(0), a(0) { + } + + inline color_quad(const color_quad& other) + : r(other.r), g(other.g), b(other.b), a(other.a) { + } + + inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) { + set(y, alpha); + } + + inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { + set(red, green, blue, alpha); + } + + template + inline color_quad(const color_quad& other) + : r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) { + } + + inline void clear() { + r = 0; + g = 0; + b = 0; + a = 0; + } + + inline color_quad& operator=(const color_quad& other) { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } + + template + inline color_quad& operator=(const color_quad& other) { + r = clamp(other.r); + g = clamp(other.g); + b = clamp(other.b); + a = clamp(other.a); + return *this; + } + + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { + y = clamp(y); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static inline parameter_type get_min_comp() { return component_traits::cMin; } + static inline parameter_type get_max_comp() { return component_traits::cMax; } + static inline bool get_comps_are_signed() { return component_traits::cSigned; } + + inline component_type operator[](uint32 i) const { + CRND_ASSERT(i < cNumComps); + return c[i]; + } + inline component_type& operator[](uint32 i) { + CRND_ASSERT(i < cNumComps); + return c[i]; + } + + inline color_quad& set_component(uint32 i, parameter_type f) { + CRND_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + inline color_quad& clamp(const color_quad& l, const color_quad& h) { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + return *this; + } + + inline color_quad& clamp(parameter_type l, parameter_type h) { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + inline uint32 squared_distance(const color_quad& c, bool alpha = true) const { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad& rhs) const { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator==(const color_quad& rhs) const { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } + + inline bool operator<(const color_quad& rhs) const { + for (uint32 i = 0; i < cNumComps; i++) { + if (c[i] < rhs.c[i]) + return true; + else if (!(c[i] == rhs.c[i])) + return false; + } + return false; + } + + inline color_quad& operator+=(const color_quad& other) { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } + + inline color_quad& operator-=(const color_quad& other) { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } + + inline color_quad& operator*=(parameter_type v) { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] * v)); + return *this; + } + + inline color_quad& operator/=(parameter_type v) { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(c[i] / v); + return *this; + } + + inline color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const { + CRND_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } + + inline friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) { + color_quad result(lhs); + result += rhs; + return result; + } + + inline friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) { + color_quad result(lhs); + result -= rhs; + return result; + } + + inline friend color_quad operator*(const color_quad& lhs, parameter_type v) { + color_quad result(lhs); + result *= v; + return result; + } + + friend inline color_quad operator/(const color_quad& lhs, parameter_type v) { + color_quad result(lhs); + result /= v; + return result; + } + + friend inline color_quad operator*(parameter_type v, const color_quad& rhs) { + color_quad result(rhs); + result *= v; + return result; + } + + inline uint32 get_min_component_index(bool alpha = true) const { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] < c[index]) + index = i; + return index; + } + + inline uint32 get_max_component_index(bool alpha = true) const { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] > c[index]) + index = i; + return index; + } + + inline void get_float4(float* pDst) { + for (uint32 i = 0; i < 4; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + inline void get_float3(float* pDst) { + for (uint32 i = 0; i < 3; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + static inline color_quad make_black() { + return color_quad(0, 0, 0, component_traits::cMax); + } + + static inline color_quad make_white() { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } +}; // class color_quad + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +template +struct scalar_type > { + enum { cFlag = true }; + static inline void construct(color_quad* p) {} + static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } + static inline void construct_array(color_quad* p, uint32 n) { p, n; } + static inline void destruct(color_quad* p) { p; } + static inline void destruct_array(color_quad* p, uint32 n) { p, n; } +}; + +typedef color_quad color_quad_u8; +typedef color_quad color_quad_i16; +typedef color_quad color_quad_u16; +typedef color_quad color_quad_i32; +typedef color_quad color_quad_u32; +typedef color_quad color_quad_f; +typedef color_quad color_quad_d; + +} // namespace unitycrnd + +// File: crnd_dxt.h +namespace unitycrnd { +enum dxt_format { + cDXTInvalid = -1, + + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, + + cDXT3, + cDXT5, + cDXT5A, + + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX // standard ATI2 +}; + +enum dxt_constants { + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift, + + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, + + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, + + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U +}; + +const float cDXT1MaxLinearValue = 3.0f; +const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; + +const float cDXT5MaxLinearValue = 7.0f; +const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; + +// Converts DXT1 raw color selector index to a linear value. +extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; + +// Converts DXT5 raw alpha selector index to a linear value. +extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + +// Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). +extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; + +// Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). +extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; + +extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; +extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; + +struct dxt1_block { + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() { + utils::zero_this(this); + } + + // These methods assume the in-memory rep is in LE byte order. + inline uint32 get_low_color() const { + return m_low_color[0] | (m_low_color[1] << 8U); + } + + inline uint32 get_high_color() const { + return m_high_color[0] | (m_high_color[1] << 8U); + } + + inline void set_low_color(uint16 c) { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline void set_high_color(uint16 c) { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline uint32 get_selector(uint32 x, uint32 y) const { + CRND_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } + + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); + static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); + + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); + static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); + + static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + // pDst must point to an array at least cDXT1SelectorValues long. + static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); + static uint32 pack_endpoints(uint32 lo, uint32 hi); +}; + +CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); + +struct dxt3_block { + enum { cNumAlphaBytes = 8 }; + uint8 m_alpha[cNumAlphaBytes]; + + void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); + uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; +}; + +CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); + +struct dxt5_block { + uint8 m_endpoints[2]; + + enum { cNumSelectorBytes = 6 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() { + utils::zero_this(this); + } + + inline uint32 get_low_alpha() const { + return m_endpoints[0]; + } + + inline uint32 get_high_alpha() const { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint32 i) { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint32 i) { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + uint32 get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + + uint32 get_selectors_as_word(uint32 index) { + CRND_ASSERT(index < 3); + return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); + } + + inline uint32 get_selector(uint32 x, uint32 y) const { + CRND_ASSERT((x < 4U) && (y < 4U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + + // Results written to alpha channel. + static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); + + static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); + + static uint32 unpack_endpoint(uint32 packed, uint32 index); + static uint32 pack_endpoints(uint32 lo, uint32 hi); +}; + +CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); + +} // namespace unitycrnd + +// File: crnd_prefix_coding.h +#ifdef _XBOX +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 +#else +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 +#endif + +namespace unitycrnd { +namespace prefix_coding { +const uint32 cMaxExpectedCodeSize = 16; +const uint32 cMaxSupportedSyms = 8192; +const uint32 cMaxTableBits = 11; + +class decoder_tables { + public: + inline decoder_tables() + : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { + } + + inline decoder_tables(const decoder_tables& other) + : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { + *this = other; + } + + decoder_tables& operator=(const decoder_tables& other) { + if (this == &other) + return *this; + + clear(); + + memcpy(this, &other, sizeof(*this)); + + if (other.m_lookup) { + m_lookup = crnd_new_array(m_cur_lookup_size); + if (m_lookup) + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + if (other.m_sorted_symbol_order) { + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (m_sorted_symbol_order) + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + + return *this; + } + + inline void clear() { + if (m_lookup) { + crnd_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) { + crnd_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() { + if (m_lookup) + crnd_delete_array(m_lookup); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + } + + bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); + + // DO NOT use any complex classes here - it is bitwise copied. + + uint32 m_num_syms; + uint32 m_total_used_syms; + uint32 m_table_bits; + uint32 m_table_shift; + uint32 m_table_max_code; + uint32 m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint32 m_max_codes[cMaxExpectedCodeSize + 1]; + int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint32 m_cur_lookup_size; + uint32* m_lookup; + + uint32 m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint32 get_unshifted_max_code(uint32 len) const { + CRND_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); + uint32 k = m_max_codes[len - 1]; + if (!k) + return unitycrnd::cUINT32_MAX; + return (k - 1) >> (16 - len); + } +}; + +} // namespace prefix_coding + +} // namespace unitycrnd + +// File: crnd_symbol_codec.h +namespace unitycrnd { +class static_huffman_data_model { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); + + static_huffman_data_model& operator=(const static_huffman_data_model& rhs); + + bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); + void clear(); + + inline bool is_valid() const { return m_pDecode_tables != NULL; } + + inline uint32 get_total_syms() const { return m_total_syms; } + + inline uint32 get_code_size(uint32 sym) const { return m_code_sizes[sym]; } + + inline const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + + public: + uint32 m_total_syms; + unitycrnd::vector m_code_sizes; + prefix_coding::decoder_tables* m_pDecode_tables; + + private: + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; + + friend class symbol_codec; +}; + +class symbol_codec { + public: + symbol_codec(); + + bool start_decoding(const uint8* pBuf, uint32 buf_size); + bool decode_receive_static_data_model(static_huffman_data_model& model); + + uint32 decode_bits(uint32 num_bits); + uint32 decode(const static_huffman_data_model& model); + + uint64 stop_decoding(); + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + uint32 m_decode_buf_size; + + typedef uint32 bit_buf_type; + enum { cBitBufSize = 32U }; + bit_buf_type m_bit_buf; + + int m_bit_count; + + private: + void get_bits_init(); + uint32 get_bits(uint32 num_bits); +}; + +} // namespace unitycrnd + +namespace unitycrnd { +void crnd_assert(const char* pExp, const char* pFile, unsigned line) { + char buf[512]; + +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#else + sprintf(buf, "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#endif + + crnd_output_debug_string(buf); + + puts(buf); + + if (crnd_is_debugger_present()) + crnd_debug_break(); +} + +void crnd_trace(const char* pFmt, va_list args) { + if (crnd_is_debugger_present()) { + char buf[512]; +#if defined(_WIN32) && defined(_MSC_VER) + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsprintf(buf, pFmt, args); +#endif + + crnd_output_debug_string(buf); + } +}; + +void crnd_trace(const char* pFmt, ...) { + va_list args; + va_start(args, pFmt); + crnd_trace(pFmt, args); + va_end(args); +}; + +} // namespace unitycrnd + +// File: checksum.cpp +// From the public domain stb.h header. +namespace unitycrnd { +uint16 crc16(const void* pBuf, uint32 len, uint16 crc) { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) { + const uint16 q = *p++ ^ (crc >> 8U); + crc <<= 8U; + + uint16 r = (q >> 4U) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + + len--; + } + + return static_cast(~crc); +} + +} // namespace unitycrnd + +// File: crnd_vector.cpp +namespace unitycrnd { +bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) { + CRND_ASSERT(m_size <= m_capacity); + CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + uint32 new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(new_capacity))) + new_capacity = math::next_pow2(new_capacity); + + CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const uint32 desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) { + void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + return false; + m_p = new_p; + } else { + void* new_p = crnd_malloc(desired_size, &actual_size); + if (!new_p) + return false; + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + crnd_free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = new_capacity; + + return true; +} + +} // namespace unitycrnd + +// File: crnd_utils.cpp +namespace unitycrnd { +namespace utils { +uint32 compute_max_mips(uint32 width, uint32 height) { + if ((width | height) == 0) + return 0; + + uint32 num_mips = 1; + + while ((width > 1U) || (height > 1U)) { + width >>= 1U; + height >>= 1U; + num_mips++; + } + + return num_mips; +} + +} // namespace utils + +} // namespace unitycrnd + +// File: crnd_prefix_coding.cpp +namespace unitycrnd { +namespace prefix_coding { +bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) { + uint32 min_codes[cMaxExpectedCodeSize]; + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + m_num_syms = num_syms; + + uint32 num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint32 i = 0; i < num_syms; i++) { + uint32 c = pCodesizes[i]; + if (c) + num_codes[c]++; + } + + uint32 sorted_positions[cMaxExpectedCodeSize + 1]; + + uint32 cur_code = 0; + + uint32 total_used_syms = 0; + uint32 max_code_size = 0; + uint32 min_code_size = cUINT32_MAX; + for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) { + const uint32 n = num_codes[i]; + + if (!n) + m_max_codes[i - 1] = 0; //UINT_MAX; + else { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = cur_code; + + m_max_codes[i - 1] = cur_code + n - 1; + m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + + m_val_ptrs[i - 1] = total_used_syms; + + sorted_positions[i] = total_used_syms; + + cur_code += n; + total_used_syms += n; + } + + cur_code <<= 1; + } + + m_total_used_syms = total_used_syms; + + if (total_used_syms > m_cur_sorted_symbol_order_size) { + m_cur_sorted_symbol_order_size = total_used_syms; + + if (!math::is_power_of_2(total_used_syms)) + m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (!m_sorted_symbol_order) + return false; + } + + m_min_code_size = static_cast(min_code_size); + m_max_code_size = static_cast(max_code_size); + + for (uint32 i = 0; i < num_syms; i++) { + uint32 c = pCodesizes[i]; + if (c) { + CRND_ASSERT(num_codes[c]); + + uint32 sorted_pos = sorted_positions[c]++; + + CRND_ASSERT(sorted_pos < total_used_syms); + + m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= m_min_code_size) + table_bits = 0; + m_table_bits = table_bits; + + if (table_bits) { + uint32 table_size = 1 << table_bits; + if (table_size > m_cur_lookup_size) { + m_cur_lookup_size = table_size; + + if (m_lookup) + crnd_delete_array(m_lookup); + + m_lookup = crnd_new_array(table_size); + if (!m_lookup) + return false; + } + + memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); + + for (uint32 codesize = 1; codesize <= table_bits; codesize++) { + if (!num_codes[codesize]) + continue; + + const uint32 fillsize = table_bits - codesize; + const uint32 fillnum = 1 << fillsize; + + const uint32 min_code = min_codes[codesize - 1]; + const uint32 max_code = get_unshifted_max_code(codesize); + const uint32 val_ptr = m_val_ptrs[codesize - 1]; + + for (uint32 code = min_code; code <= max_code; code++) { + const uint32 sym_index = m_sorted_symbol_order[val_ptr + code - min_code]; + CRND_ASSERT(pCodesizes[sym_index] == codesize); + + for (uint32 j = 0; j < fillnum; j++) { + const uint32 t = j + (code << fillsize); + + CRND_ASSERT(t < (1U << table_bits)); + + CRND_ASSERT(m_lookup[t] == cUINT32_MAX); + + m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) + m_val_ptrs[i] -= min_codes[i]; + + m_table_max_code = 0; + m_decode_start_code_size = m_min_code_size; + + if (table_bits) { + uint32 i; + for (i = table_bits; i >= 1; i--) { + if (num_codes[i]) { + m_table_max_code = m_max_codes[i - 1]; + break; + } + } + if (i >= 1) { + m_decode_start_code_size = table_bits + 1; + for (uint32 j = table_bits + 1; j <= max_code_size; j++) { + if (num_codes[j]) { + m_decode_start_code_size = j; + break; + } + } + } + } + + // sentinels + m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; + m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + m_table_shift = 32 - m_table_bits; + return true; +} + +} // namespace prefix_codig + +} // namespace unitycrnd + +// File: crnd_platform.cpp +namespace unitycrnd { +bool crnd_is_debugger_present() { +#ifdef CRND_DEVEL + return IsDebuggerPresent() != 0; +#else + return false; +#endif +} + +void crnd_debug_break() { +#ifdef CRND_DEVEL + DebugBreak(); +#endif +} + +void crnd_output_debug_string(const char* p) { + (void)p; +#ifdef CRND_DEVEL + OutputDebugStringA(p); +#endif +} + +} // namespace unitycrnd + +// File: crnd_mem.cpp +namespace unitycrnd { +const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + +static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) { + void* p_new; + + if (!p) { + p_new = ::malloc(size); + + if (pActual_size) { +#ifdef _WIN32 + *pActual_size = p_new ? ::_msize(p_new) : 0; +#else + *pActual_size = p_new ? malloc_usable_size(p_new) : 0; +#endif + } + } else if (!size) { + ::free(p); + p_new = NULL; + + if (pActual_size) + *pActual_size = 0; + } else { + void* p_final_block = p; +#ifdef _WIN32 + p_new = ::_expand(p, size); +#else + p_new = NULL; +#endif + + if (p_new) + p_final_block = p_new; + else if (movable) { + p_new = ::realloc(p, size); + + if (p_new) + p_final_block = p_new; + } + + if (pActual_size) { +#ifdef _WIN32 + *pActual_size = ::_msize(p_final_block); +#else + *pActual_size = ::malloc_usable_size(p_final_block); +#endif + } + } + + return p_new; +} + +static size_t crnd_default_msize(void* p, void* pUser_data) { + pUser_data; +#ifdef _WIN32 + return p ? _msize(p) : 0; +#else + return p ? malloc_usable_size(p) : 0; +#endif +} + +static crnd_realloc_func g_pRealloc = crnd_default_realloc; +static crnd_msize_func g_pMSize = crnd_default_msize; +static void* g_pUser_data; + +void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) { + if ((!pRealloc) || (!pMSize)) { + g_pRealloc = crnd_default_realloc; + g_pMSize = crnd_default_msize; + g_pUser_data = NULL; + } else { + g_pRealloc = pRealloc; + g_pMSize = pMSize; + g_pUser_data = pUser_data; + } +} + +static inline void crnd_mem_error(const char* p_msg) { + crnd_assert(p_msg, __FILE__, __LINE__); +} + +void* crnd_malloc(size_t size, size_t* pActual_size) { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > MAX_POSSIBLE_BLOCK_SIZE) { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + *pActual_size = actual_size; + + if ((!p_new) || (actual_size < size)) { + crnd_mem_error("crnd_malloc: out of memory"); + return NULL; + } + + CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; +} + +void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { + if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { + crnd_mem_error("crnd_realloc: bad ptr"); + return NULL; + } + + if (size > MAX_POSSIBLE_BLOCK_SIZE) { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + + if (pActual_size) + *pActual_size = actual_size; + + CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; +} + +void crnd_free(void* p) { + if (!p) + return; + + if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { + crnd_mem_error("crnd_free: bad ptr"); + return; + } + + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); +} + +size_t crnd_msize(void* p) { + if (!p) + return 0; + + if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { + crnd_mem_error("crnd_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); +} + +} // namespace unitycrnd + +// File: crnd_math.cpp +namespace unitycrnd { +namespace math { +uint32 g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U}; + +} // namespace math +} // namespace unitycrnd + +// File: crnd_info.cpp +namespace unitycrnd { +#define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + +uint32 crnd_crn_format_to_fourcc(crn_format fmt) { + switch (fmt) { + case cCRNFmtDXT1: + return CRND_FOURCC('D', 'X', 'T', '1'); + case cCRNFmtDXT3: + return CRND_FOURCC('D', 'X', 'T', '3'); + case cCRNFmtDXT5: + return CRND_FOURCC('D', 'X', 'T', '5'); + case cCRNFmtDXN_XY: + return CRND_FOURCC('A', '2', 'X', 'Y'); + case cCRNFmtDXN_YX: + return CRND_FOURCC('A', 'T', 'I', '2'); + case cCRNFmtDXT5A: + return CRND_FOURCC('A', 'T', 'I', '1'); + case cCRNFmtDXT5_CCxY: + return CRND_FOURCC('C', 'C', 'x', 'Y'); + case cCRNFmtDXT5_xGxR: + return CRND_FOURCC('x', 'G', 'x', 'R'); + case cCRNFmtDXT5_xGBR: + return CRND_FOURCC('x', 'G', 'B', 'R'); + case cCRNFmtDXT5_AGBR: + return CRND_FOURCC('A', 'G', 'B', 'R'); + case cCRNFmtETC1: + return CRND_FOURCC('E', 'T', 'C', '1'); + case cCRNFmtETC2: + return CRND_FOURCC('E', 'T', 'C', '2'); + case cCRNFmtETC2A: + return CRND_FOURCC('E', 'T', '2', 'A'); + case cCRNFmtETC1S: + return CRND_FOURCC('E', 'T', '1', 'S'); + case cCRNFmtETC2AS: + return CRND_FOURCC('E', '2', 'A', 'S'); + default: + break; + } + CRND_ASSERT(false); + return 0; +} + +crn_format crnd_get_fundamental_dxt_format(crn_format fmt) { + switch (fmt) { + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return cCRNFmtDXT5; + default: + break; + } + return fmt; +} + +uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) { + switch (fmt) { + case cCRNFmtDXT1: + case cCRNFmtDXT5A: + case cCRNFmtETC1: + case cCRNFmtETC2: + case cCRNFmtETC1S: + return 4; + case cCRNFmtDXT3: + case cCRNFmtDXT5: + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtETC2A: + case cCRNFmtETC2AS: + return 8; + default: + break; + } + CRND_ASSERT(false); + return 0; +} + +uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) { + return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; +} + +// TODO: tmp_header isn't used/This function is a helper to support old headers. +const crn_header* crnd_get_header(const void* pData, uint32 data_size) { + if ((!pData) || (data_size < sizeof(crn_header))) + return NULL; + + const crn_header& file_header = *static_cast(pData); + if (file_header.m_sig != crn_header::cCRNSigValue) + return NULL; + + if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) + return NULL; + + return &file_header; +} + +bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) { + if (pFile_info) { + if (pFile_info->m_struct_size != sizeof(crn_file_info)) + return false; + + memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); + } + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); + if (header_crc != pHeader->m_header_crc16) + return false; + + const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); + if (data_crc != pHeader->m_data_crc16) + return false; + + if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) + return false; + if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) + return false; + if (((int)pHeader->m_format < cCRNFmtDXT1) || ((int)pHeader->m_format >= cCRNFmtTotal)) + return false; + + if (pFile_info) { + pFile_info->m_actual_data_size = pHeader->m_data_size; + pFile_info->m_header_size = pHeader->m_header_size; + pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; + pFile_info->m_tables_size = pHeader->m_tables_size; + + pFile_info->m_levels = pHeader->m_levels; + + for (uint32 i = 0; i < pHeader->m_levels; i++) { + uint32 next_ofs = pHeader->m_data_size; + + // assumes the levels are packed together sequentially + if ((i + 1) < pHeader->m_levels) + next_ofs = pHeader->m_level_ofs[i + 1]; + + pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; + } + + pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; + pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num; + ; + pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num; + ; + pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num; + ; + } + + return true; +} + +bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) { + if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) + return false; + + if (pInfo->m_struct_size != sizeof(crn_texture_info)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + pInfo->m_width = pHeader->m_width; + pInfo->m_height = pHeader->m_height; + pInfo->m_levels = pHeader->m_levels; + pInfo->m_faces = pHeader->m_faces; + pInfo->m_format = static_cast((uint32)pHeader->m_format); + pInfo->m_bytes_per_block = pHeader->m_format == cCRNFmtDXT1 || pHeader->m_format == cCRNFmtDXT5A || pHeader->m_format == cCRNFmtETC1 || pHeader->m_format == cCRNFmtETC2 || pHeader->m_format == cCRNFmtETC1S ? 8 : 16; + pInfo->m_userdata0 = pHeader->m_userdata0; + pInfo->m_userdata1 = pHeader->m_userdata1; + + return true; +} + +bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) { + if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) + return false; + + if (pLevel_info->m_struct_size != sizeof(crn_level_info)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + if (level_index >= pHeader->m_levels) + return false; + + uint32 width = math::maximum(1U, pHeader->m_width >> level_index); + uint32 height = math::maximum(1U, pHeader->m_height >> level_index); + + pLevel_info->m_width = width; + pLevel_info->m_height = height; + pLevel_info->m_faces = pHeader->m_faces; + pLevel_info->m_blocks_x = (width + 3) >> 2; + pLevel_info->m_blocks_y = (height + 3) >> 2; + pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pLevel_info->m_format = static_cast((uint32)pHeader->m_format); + + return true; +} + +const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) { + if (pSize) + *pSize = 0; + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return NULL; + + if (level_index >= pHeader->m_levels) + return NULL; + + uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; + + if (pSize) { + uint32 next_level_ofs = data_size; + if ((level_index + 1) < (pHeader->m_levels)) + next_level_ofs = pHeader->m_level_ofs[level_index + 1]; + + *pSize = next_level_ofs - cur_level_ofs; + } + + return static_cast(pData) + cur_level_ofs; +} + +uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + uint32 size = pHeader->m_header_size; + + size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); + size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); + size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); + size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); + size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); + + return size; +} + +bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + const crn_header* pHeader = crnd_get_header(pData, data_size); + if (!pHeader) + return false; + + if (pHeader->m_flags & cCRNHeaderFlagSegmented) + return false; + + const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); + if (base_data_size < actual_base_data_size) + return false; + + memcpy(pBase_data, pData, actual_base_data_size); + + crn_header& new_header = *static_cast(pBase_data); + new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; + new_header.m_data_size = actual_base_data_size; + + new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); + + new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); + + CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); + + return true; +} + +} // namespace unitycrnd + +// File: symbol_codec.cpp +namespace unitycrnd { +static_huffman_data_model::static_huffman_data_model() + : m_total_syms(0), + m_pDecode_tables(NULL) { +} + +static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) + : m_total_syms(0), + m_pDecode_tables(NULL) { + *this = other; +} + +static_huffman_data_model::~static_huffman_data_model() { + if (m_pDecode_tables) + crnd_delete(m_pDecode_tables); +} + +static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) { + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + m_code_sizes = rhs.m_code_sizes; + if (m_code_sizes.get_alloc_failed()) { + clear(); + return *this; + } + + if (rhs.m_pDecode_tables) { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); + } else { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + return *this; +} + +void static_huffman_data_model::clear() { + m_total_syms = 0; + m_code_sizes.clear(); + if (m_pDecode_tables) { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } +} + +bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) { + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + if (!m_code_sizes.resize(total_syms)) + return false; + + uint32 min_code_size = cUINT32_MAX; + uint32 max_code_size = 0; + + for (uint32 i = 0; i < total_syms; i++) { + uint32 s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } + + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + return false; + + if (max_code_size > code_size_limit) + return false; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) + return false; + + return true; +} + +bool static_huffman_data_model::prepare_decoder_tables() { + uint32 total_syms = m_code_sizes.size(); + + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + + m_total_syms = total_syms; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); +} + +uint static_huffman_data_model::compute_decoder_table_bits() const { +#if CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE + return prefix_coding::cMaxTableBits; +#else + uint32 decoder_table_bits = 0; + if (m_total_syms > 16) + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + return decoder_table_bits; +#endif +} + +symbol_codec::symbol_codec() + : m_pDecode_buf(NULL), + m_pDecode_buf_next(NULL), + m_pDecode_buf_end(NULL), + m_decode_buf_size(0), + m_bit_buf(0), + m_bit_count(0) { +} + +// Code length encoding symbols: +// 0-16 - actual code lengths +const uint32 cMaxCodelengthCodes = 21; + +const uint32 cSmallZeroRunCode = 17; +const uint32 cLargeZeroRunCode = 18; +const uint32 cSmallRepeatCode = 19; +const uint32 cLargeRepeatCode = 20; + +const uint32 cMinSmallZeroRunSize = 3; +const uint32 cMaxSmallZeroRunSize = 10; +const uint32 cMinLargeZeroRunSize = 11; +const uint32 cMaxLargeZeroRunSize = 138; + +const uint32 cSmallMinNonZeroRunSize = 3; +const uint32 cSmallMaxNonZeroRunSize = 6; +const uint32 cLargeMinNonZeroRunSize = 7; +const uint32 cLargeMaxNonZeroRunSize = 70; + +const uint32 cSmallZeroRunExtraBits = 3; +const uint32 cLargeZeroRunExtraBits = 7; +const uint32 cSmallNonZeroRunExtraBits = 2; +const uint32 cLargeNonZeroRunExtraBits = 6; + +static const uint8 g_most_probable_codelength_codes[] = + { + cSmallZeroRunCode, cLargeZeroRunCode, + cSmallRepeatCode, cLargeRepeatCode, + + 0, 8, + 7, 9, + 6, 10, + 5, 11, + 4, 12, + 3, 13, + 2, 14, + 1, 15, + 16}; +const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); + +bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) { + const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + + if (!total_used_syms) { + model.clear(); + return true; + } + + if (!model.m_code_sizes.resize(total_used_syms)) + return false; + + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + + const uint32 num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + return false; + + static_huffman_data_model dm; + if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) + return false; + + for (uint32 i = 0; i < num_codelength_codes_to_send; i++) + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + + if (!dm.prepare_decoder_tables()) + return false; + + uint32 ofs = 0; + while (ofs < total_used_syms) { + const uint32 num_remaining = total_used_syms - ofs; + + uint32 code = decode(dm); + if (code <= 16) + model.m_code_sizes[ofs++] = static_cast(code); + else if (code == cSmallZeroRunCode) { + uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } else if (code == cLargeZeroRunCode) { + uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) { + uint32 len; + if (code == cSmallRepeatCode) + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + else + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + + if ((!ofs) || (len > num_remaining)) + return false; + const uint32 prev = model.m_code_sizes[ofs - 1]; + if (!prev) + return false; + const uint32 end = ofs + len; + while (ofs < end) + model.m_code_sizes[ofs++] = static_cast(prev); + } else { + CRND_ASSERT(0); + return false; + } + } + + if (ofs != total_used_syms) + return false; + + return model.prepare_decoder_tables(); +} + +bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) { + if (!buf_size) + return false; + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + get_bits_init(); + + return true; +} + +void symbol_codec::get_bits_init() { + m_bit_buf = 0; + m_bit_count = 0; +} + +uint32 symbol_codec::decode_bits(uint32 num_bits) { + if (!num_bits) + return 0; + + if (num_bits > 16) { + uint32 a = get_bits(num_bits - 16); + uint32 b = get_bits(16); + + return (a << 16) | b; + } else + return get_bits(num_bits); +} + +uint32 symbol_codec::get_bits(uint32 num_bits) { + CRND_ASSERT(num_bits <= 32U); + + while (m_bit_count < (int)num_bits) { + bit_buf_type c = 0; + if (m_pDecode_buf_next != m_pDecode_buf_end) + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRND_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (c << (cBitBufSize - m_bit_count)); + } + + uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; +} + +uint32 symbol_codec::decode(const static_huffman_data_model& model) { + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + if (m_bit_count < 24) { + if (m_bit_count < 16) { + uint32 c0 = 0, c1 = 0; + const uint8* p = m_pDecode_buf_next; + if (p < m_pDecode_buf_end) + c0 = *p++; + if (p < m_pDecode_buf_end) + c1 = *p++; + m_pDecode_buf_next = p; + m_bit_count += 16; + uint32 c = (c0 << 8) | c1; + m_bit_buf |= (c << (32 - m_bit_count)); + } else { + uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; + m_bit_count += 8; + m_bit_buf |= (c << (32 - m_bit_count)); + } + } + + uint32 k = (m_bit_buf >> 16) + 1; + uint32 sym, len; + + if (k <= pTables->m_table_max_code) { + uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; + + CRND_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRND_ASSERT(model.m_code_sizes[sym] == len); + } else { + len = pTables->m_decode_start_code_size; + + for (;;) { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); + + if (((uint32)val_ptr >= model.m_total_syms)) { + // corrupted stream, or a bug + CRND_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; +} + +uint64 symbol_codec::stop_decoding() { + return static_cast(m_pDecode_buf_next - m_pDecode_buf); +} + +} // namespace unitycrnd + +// File: crnd_dxt.cpp +namespace unitycrnd { +const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; +const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = {0U, 2U, 3U, 1U}; +const uint8 g_etc1_from_linear[cDXT1SelectorValues] = {3U, 2U, 0U, 1U}; + +const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = {0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U}; +const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = {0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U}; + +const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 5, 4, 3, 2, 6, 7}; +const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 7, 6, 5, 4, 3, 2}; + +uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) { + uint32 r = color.r; + uint32 g = color.g; + uint32 b = color.b; + + if (scaled) { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 11U)); +} + +uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); +} + +color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) { + uint32 b = packed_color & 31U; + uint32 g = (packed_color >> 5U) & 63U; + uint32 r = (packed_color >> 11U) & 31U; + + if (scaled) { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(r, g, b, alpha); +} + +void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; +} + +uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set(0, 0, 0, 0); + + return 3; +} + +uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // Turns out some GPU's round and some don't. Great. + //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + + pDst[2].set((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + + return 4; +} + +uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) { + if (color0 > color1) + return get_block_colors4(pDst, color0, color1); + else + return get_block_colors3(pDst, color0, color1); +} + +color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) { + CRND_ASSERT(index < 2); + return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); +} + +uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) { + CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); +} + +void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + if (scaled) { + CRND_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } else { + CRND_ASSERT(value <= 0xF); + } + + uint32 ofs = (y << 1U) + (x >> 1U); + uint32 c = m_alpha[ofs]; + + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); + + m_alpha[ofs] = static_cast(c); +} + +uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + value >>= 4; + value &= 0xF; + + if (scaled) + value = (value << 4U) | value; + + return value; +} + +uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; +} + +uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; +} + +uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); +} + +uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; +} + +uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; +} + +uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) { + CRND_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; +} + +uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) { + CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); +} + +uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); +} + +} // namespace unitycrnd + +// File: crnd_decode.cpp + +namespace unitycrnd { + +class crn_unpacker { + public: + inline crn_unpacker() + : m_magic(cMagicValue), + m_pData(NULL), + m_data_size(0), + m_pHeader(NULL) { + } + + inline ~crn_unpacker() { + m_magic = 0; + } + + inline bool is_valid() const { return m_magic == cMagicValue; } + + bool init(const void* pData, uint32 data_size) { + m_pHeader = crnd_get_header(pData, data_size); + if (!m_pHeader) + return false; + + m_pData = static_cast(pData); + m_data_size = data_size; + + if (!init_tables()) + return false; + + if (!decode_palettes()) + return false; + + return true; + } + + bool unpack_level( + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) { + uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; + + uint32 next_level_ofs = m_data_size; + if ((level_index + 1) < (m_pHeader->m_levels)) + next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; + + CRND_ASSERT(next_level_ofs > cur_level_ofs); + + return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool unpack_level( + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) { + +#ifdef CRND_BUILD_DEBUG + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + if (!pDst[f]) + return false; +#endif + + const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); + const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); + const uint32 blocks_x = (width + 3U) >> 2U; + const uint32 blocks_y = (height + 3U) >> 2U; + const uint32 block_size = m_pHeader->m_format == cCRNFmtDXT1 || m_pHeader->m_format == cCRNFmtDXT5A || m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC1S ? 8 : 16; + + uint32 minimal_row_pitch = block_size * blocks_x; + if (!row_pitch_in_bytes) + row_pitch_in_bytes = minimal_row_pitch; + else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) + return false; + if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) + return false; + + if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) + return false; + + bool status = false; + switch (m_pHeader->m_format) { + case cCRNFmtDXT1: + case cCRNFmtETC1S: + status = unpack_dxt1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXT5: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + case cCRNFmtETC2AS: + status = unpack_dxt5((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXT5A: + status = unpack_dxt5a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + status = unpack_dxn((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC1: + status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC2: + status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + case cCRNFmtETC2A: + status = unpack_etc2a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); + break; + default: + return false; + } + if (!status) + return false; + + m_codec.stop_decoding(); + return true; + } + + inline const void* get_data() const { return m_pData; } + inline uint32 get_data_size() const { return m_data_size; } + + private: + enum { cMagicValue = 0x1EF9CABD }; + uint32 m_magic; + + const uint8* m_pData; + uint32 m_data_size; + const crn_header* m_pHeader; + + symbol_codec m_codec; + + static_huffman_data_model m_reference_encoding_dm; + static_huffman_data_model m_endpoint_delta_dm[2]; + static_huffman_data_model m_selector_delta_dm[2]; + + unitycrnd::vector m_color_endpoints; + unitycrnd::vector m_color_selectors; + + unitycrnd::vector m_alpha_endpoints; + unitycrnd::vector m_alpha_selectors; + + struct block_buffer_element { + uint16 endpoint_reference; + uint16 color_endpoint_index; + uint16 alpha0_endpoint_index; + uint16 alpha1_endpoint_index; + }; + unitycrnd::vector m_block_buffer; + + bool init_tables() { + if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) + return false; + + if (!m_codec.decode_receive_static_data_model(m_reference_encoding_dm)) + return false; + + if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) + return false; + + if (m_pHeader->m_color_endpoints.m_num) { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) + return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) + return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) + return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) + return false; + } + + m_codec.stop_decoding(); + + return true; + } + + bool decode_palettes() { + if (m_pHeader->m_color_endpoints.m_num) { + if (!decode_color_endpoints()) + return false; + if (!decode_color_selectors()) + return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) { + if (!decode_alpha_endpoints()) + return false; + if (!(m_pHeader->m_format == cCRNFmtETC2AS ? decode_alpha_selectors_etcs() : m_pHeader->m_format == cCRNFmtETC2A ? decode_alpha_selectors_etc() : decode_alpha_selectors())) + return false; + } + + return true; + } + + bool decode_color_endpoints() { + const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; + const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; + const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; + + if (!m_color_endpoints.resize(num_color_endpoints)) + return false; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) + return false; + + static_huffman_data_model dm[2]; + for (uint32 i = 0; i < (has_etc_color_blocks ? 1 : 2); i++) + if (!m_codec.decode_receive_static_data_model(dm[i])) + return false; + + uint32 a = 0, b = 0, c = 0; + uint32 d = 0, e = 0, f = 0; + + uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; + + for (uint32 i = 0; i < num_color_endpoints; i++) { + if (has_etc_color_blocks) { + for (b = 0; b < 32; b += 8) + a += m_codec.decode(dm[0]) << b; + a &= 0x1F1F1F1F; + *pDst++ = has_subblocks ? a : (a & 0x07000000) << 5 | (a & 0x07000000) << 2 | 0x02000000 | (a & 0x001F1F1F) << 3; + } else { + a = (a + m_codec.decode(dm[0])) & 31; + b = (b + m_codec.decode(dm[1])) & 63; + c = (c + m_codec.decode(dm[0])) & 31; + d = (d + m_codec.decode(dm[0])) & 31; + e = (e + m_codec.decode(dm[1])) & 63; + f = (f + m_codec.decode(dm[0])) & 31; + *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); + } + } + + m_codec.stop_decoding(); + + return true; + } + + bool decode_color_selectors() { + const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; + const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; + m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_color_selectors.resize(m_pHeader->m_color_selectors.m_num << (has_subblocks ? 1 : 0)); + for (uint32 s = 0, i = 0; i < m_pHeader->m_color_selectors.m_num; i++) { + for (uint32 j = 0; j < 32; j += 4) + s ^= m_codec.decode(dm) << j; + if (has_etc_color_blocks) { + for (uint32 selector = (~s & 0xAAAAAAAA) | (~(s ^ s >> 1) & 0x55555555), t = 8, h = 0; h < 4; h++, t -= 15) { + for (uint32 w = 0; w < 4; w++, t += 4) { + if (has_subblocks) { + uint32 s0 = selector >> (w << 3 | h << 1); + m_color_selectors[i << 1] |= ((s0 >> 1 & 1) | (s0 & 1) << 16) << (t & 15); + } + uint32 s1 = selector >> (h << 3 | w << 1); + m_color_selectors[has_subblocks ? i << 1 | 1 : i] |= ((s1 >> 1 & 1) | (s1 & 1) << 16) << (t & 15); + } + } + } else { + m_color_selectors[i] = ((s ^ s << 1) & 0xAAAAAAAA) | (s >> 1 & 0x55555555); + } + } + m_codec.stop_decoding(); + return true; + } + + bool decode_alpha_endpoints() { + const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + if (!m_alpha_endpoints.resize(num_alpha_endpoints)) + return false; + + uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; + uint32 a = 0, b = 0; + + for (uint32 i = 0; i < num_alpha_endpoints; i++) { + a = (a + m_codec.decode(dm)) & 255; + b = (b + m_codec.decode(dm)) & 255; + *pDst++ = (uint16)(a | (b << 8)); + } + + m_codec.stop_decoding(); + + return true; + } + + bool decode_alpha_selectors() { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); + uint8 dxt5_from_linear[64]; + for (uint32 i = 0; i < 64; i++) + dxt5_from_linear[i] = g_dxt5_from_linear[i & 7] | g_dxt5_from_linear[i >> 3] << 3; + for (uint32 s0_linear = 0, s1_linear = 0, i = 0; i < m_alpha_selectors.size();) { + uint32 s0 = 0, s1 = 0; + for (uint32 j = 0; j < 24; s0 |= dxt5_from_linear[s0_linear >> j & 0x3F] << j, j += 6) + s0_linear ^= m_codec.decode(dm) << j; + for (uint32 j = 0; j < 24; s1 |= dxt5_from_linear[s1_linear >> j & 0x3F] << j, j += 6) + s1_linear ^= m_codec.decode(dm) << j; + m_alpha_selectors[i++] = s0; + m_alpha_selectors[i++] = s0 >> 16 | s1 << 8; + m_alpha_selectors[i++] = s1 >> 8; + } + m_codec.stop_decoding(); + return true; + } + + bool decode_alpha_selectors_etc() { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 6); + uint8 s_linear[8] = {}; + uint8* data = (uint8*)m_alpha_selectors.begin(); + for (uint i = 0; i < m_alpha_selectors.size(); i += 6, data += 12) { + for (uint s_group = 0, p = 0; p < 16; p++) { + s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); + uint8 s = s_group & 7; + if (s <= 3) + s = 3 - s; + uint8 d = 3 * (p + 1); + uint8 byte_offset = d >> 3; + uint8 bit_offset = d & 7; + data[byte_offset] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[byte_offset - 1] |= s >> bit_offset; + d += 9 * ((p & 3) - (p >> 2)); + byte_offset = d >> 3; + bit_offset = d & 7; + data[byte_offset + 6] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[byte_offset + 5] |= s >> bit_offset; + } + } + m_codec.stop_decoding(); + return true; + } + + bool decode_alpha_selectors_etcs() { + m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); + static_huffman_data_model dm; + m_codec.decode_receive_static_data_model(dm); + m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); + uint8 s_linear[8] = {}; + uint8* data = (uint8*)m_alpha_selectors.begin(); + for (uint i = 0; i < (m_alpha_selectors.size() << 1); i += 6) { + for (uint s_group = 0, p = 0; p < 16; p++) { + s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); + uint8 s = s_group & 7; + if (s <= 3) + s = 3 - s; + uint8 d = 3 * (p + 1) + 9 * ((p & 3) - (p >> 2)); + uint8 byte_offset = d >> 3; + uint8 bit_offset = d & 7; + data[i + byte_offset] |= s << (8 - bit_offset); + if (bit_offset < 3) + data[i + byte_offset - 1] |= s >> bit_offset; + } + } + m_codec.stop_decoding(); + return true; + } + + static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) { + uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); + uint32 Micro = ((y & 6) << 2) << LogBpp; + + return Macro + + ((Micro & ~15) << 1) + + (Micro & 15) + + ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); + } + + static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) { + uint32 Macro = (x >> 5) << (LogBpp + 7); + uint32 Micro = (x & 7) << LogBpp; + uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); + + return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + + ((y & 16) << 7) + + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); + } + + static inline void limit(uint& x, uint n) { + int v = x - n; + int msk = (v >> 31); + x = (x & msk) | (v & ~msk); + } + + bool unpack_dxt1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 color_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element &buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + } else if (endpoint_reference == 1) { + buffer.color_endpoint_index = color_endpoint_index; + } else { + color_endpoint_index = buffer.color_endpoint_index; + } + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + if (visible) { + pData[0] = m_color_endpoints[color_endpoint_index]; + pData[1] = m_color_selectors[color_selector_index]; + } + } + } + } + return true; + } + + bool unpack_dxt5(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 color_endpoint_index = 0; + uint32 alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element &buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else if (endpoint_reference == 1) { + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else { + color_endpoint_index = buffer.color_endpoint_index; + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + pData[2] = m_color_endpoints[color_endpoint_index]; + pData[3] = m_color_selectors[color_selector_index]; + } + } + } + } + return true; + } + + bool unpack_dxn(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 alpha0_endpoint_index = 0; + uint32 alpha1_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element &buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) { + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + alpha1_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha1_endpoint_index >= num_alpha_endpoints) + alpha1_endpoint_index -= num_alpha_endpoints; + buffer.alpha1_endpoint_index = alpha1_endpoint_index; + } else if (endpoint_reference == 1) { + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + buffer.alpha1_endpoint_index = alpha1_endpoint_index; + } else { + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + alpha1_endpoint_index = buffer.alpha1_endpoint_index; + } + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + uint32 alpha1_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + const uint16* pAlpha1_selectors = &m_alpha_selectors[alpha1_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + pData[2] = m_alpha_endpoints[alpha1_endpoint_index] | (pAlpha1_selectors[0] << 16); + pData[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); + } + } + } + } + return true; + } + + bool unpack_dxt5a(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width) + m_block_buffer.resize(width); + + uint32 alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) { + visible = visible && x < output_width; + if (!(y & 1) && !(x & 1)) + reference_group = m_codec.decode(m_reference_encoding_dm); + block_buffer_element &buffer = m_block_buffer[x]; + uint8 endpoint_reference; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + endpoint_reference = reference_group & 3; + reference_group >>= 2; + buffer.endpoint_reference = reference_group & 3; + reference_group >>= 2; + } + if (!endpoint_reference) { + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else if (endpoint_reference == 1) { + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else { + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (visible) { + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); + pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + } + } + } + } + return true; + } + + bool unpack_etc1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); + + if (m_block_buffer.size() < width << 1) + m_block_buffer.resize(width << 1); + + uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 2) { + visible = visible && x < output_width; + block_buffer_element &buffer = m_block_buffer[x << 1]; + uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + reference_group = m_codec.decode(m_reference_encoding_dm); + endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); + buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); + } + if (!(endpoint_reference & 3)) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + } else if ((endpoint_reference & 3) == 1) { + buffer.color_endpoint_index = color_endpoint_index; + } else if ((endpoint_reference & 3) == 3) { + buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; + } else { + color_endpoint_index = buffer.color_endpoint_index; + } + endpoint_reference >>= 2; + *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; + uint32 selector_index = m_codec.decode(m_selector_delta_dm[0]); + if (endpoint_reference) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + } + diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; + m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; + *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; + if (visible) { + uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; + for (uint c = 0; diff && c < 3; c++) + diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; + for (uint c = 0; c < 3; c++) + block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; + block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; + pData[0] = *(uint32*)&block_endpoint; + pData[1] = m_color_selectors[selector_index << 1 | flip]; + } + } + } + } + return true; + } + + bool unpack_etc2a(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 width = output_width + 1 & ~1; + const uint32 height = output_height + 1 & ~1; + const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 2); + + if (m_block_buffer.size() < width << 1) + m_block_buffer.resize(width << 1); + + uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0, alpha0_endpoint_index = 0, diagonal_alpha0_endpoint_index = 0; + uint8 reference_group = 0; + + for (uint32 f = 0; f < m_pHeader->m_faces; f++) { + uint32* pData = (uint32*)pDst[f]; + for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { + bool visible = y < output_height; + for (uint32 x = 0; x < width; x++, pData += 4) { + visible = visible && x < output_width; + block_buffer_element &buffer = m_block_buffer[x << 1]; + uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; + if (y & 1) { + endpoint_reference = buffer.endpoint_reference; + } else { + reference_group = m_codec.decode(m_reference_encoding_dm); + endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); + buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); + } + if (!(endpoint_reference & 3)) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); + if (alpha0_endpoint_index >= num_alpha_endpoints) + alpha0_endpoint_index -= num_alpha_endpoints; + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else if ((endpoint_reference & 3) == 1) { + buffer.color_endpoint_index = color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index; + } else if ((endpoint_reference & 3) == 3) { + buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; + buffer.alpha0_endpoint_index = alpha0_endpoint_index = diagonal_alpha0_endpoint_index; + } else { + color_endpoint_index = buffer.color_endpoint_index; + alpha0_endpoint_index = buffer.alpha0_endpoint_index; + } + endpoint_reference >>= 2; + *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; + uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); + uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); + if (endpoint_reference) { + color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); + if (color_endpoint_index >= num_color_endpoints) + color_endpoint_index -= num_color_endpoints; + } + *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; + diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; + diagonal_alpha0_endpoint_index = m_block_buffer[x << 1 | 1].alpha0_endpoint_index; + m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; + m_block_buffer[x << 1 | 1].alpha0_endpoint_index = alpha0_endpoint_index; + if (visible) { + uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; + for (uint c = 0; diff && c < 3; c++) + diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; + for (uint c = 0; c < 3; c++) + block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; + block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; + const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 6 + (flip ? 3 : 0)]; + pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | pAlpha0_selectors[0] << 16; + pData[1] = pAlpha0_selectors[1] | pAlpha0_selectors[2] << 16; + pData[2] = *(uint32*)&block_endpoint; + pData[3] = m_color_selectors[color_selector_index << 1 | flip]; + } + } + } + } + return true; + } + +}; + +crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + crn_unpacker* p = crnd_new(); + if (!p) + return NULL; + + if (!p->init(pData, data_size)) { + crnd_delete(p); + return NULL; + } + + return p; +} + +bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + if (ppData) + *ppData = pUnpacker->get_data(); + + if (pData_size) + *pData_size = pUnpacker->get_data_size(); + + return true; +} + +bool crnd_unpack_level( + crnd_unpack_context pContext, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) { + if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); +} + +bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) { + if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); +} + +bool crnd_unpack_end(crnd_unpack_context pContext) { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + crnd_delete(pUnpacker); + + return true; +} + +} // namespace unitycrnd + +#endif // CRND_INCLUDE_CRND_H + +//------------------------------------------------------------------------------ +// +// crn_decomp.h uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/Texture2DDecoder/unitycrunch/crn_defs.h b/Texture2DDecoder/unitycrunch/crn_defs.h new file mode 100644 index 0000000..8d018e1 --- /dev/null +++ b/Texture2DDecoder/unitycrunch/crn_defs.h @@ -0,0 +1,291 @@ +#ifndef CRND_INCLUDE_CRN_DEFS_H +#define CRND_INCLUDE_CRN_DEFS_H + +// Include crnlib.h (only to bring in some basic CRN-related types). +#include "crnlib.h" + +#define CRND_LIB_VERSION 104 +#define CRND_VERSION_STRING "01.04" + +#ifdef _DEBUG +#define CRND_BUILD_DEBUG +#else +#define CRND_BUILD_RELEASE +#endif + +// CRN decompression API +namespace unitycrnd { +typedef unsigned char uint8; +typedef signed char int8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef uint32 uint32; +typedef unsigned int uint; +typedef signed int int32; +#ifdef __GNUC__ +typedef unsigned long long uint64; +typedef long long int64; +#else +typedef unsigned __int64 uint64; +typedef signed __int64 int64; +#endif + +// The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. +const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; + +// realloc callback: +// Used to allocate, resize, or free memory blocks. +// If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. +// *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. +// If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. +// If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: +// If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the +// underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. +// If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there +// is not enough memory to relocate the block. +// In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. +typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); + +// msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. +typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); + +// crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. +// If any input parameters are NULL, the memory callback functions are reset to the default functions. +// The default functions call malloc(), free(), _msize(), _expand(), etc. +void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); + +struct crn_file_info { + inline crn_file_info() + : m_struct_size(sizeof(crn_file_info)) {} + + uint32 m_struct_size; + uint32 m_actual_data_size; + uint32 m_header_size; + uint32 m_total_palette_size; + uint32 m_tables_size; + uint32 m_levels; + uint32 m_level_compressed_size[cCRNMaxLevels]; + uint32 m_color_endpoint_palette_entries; + uint32 m_color_selector_palette_entries; + uint32 m_alpha_endpoint_palette_entries; + uint32 m_alpha_selector_palette_entries; +}; + +struct crn_texture_info { + inline crn_texture_info() + : m_struct_size(sizeof(crn_texture_info)) {} + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_levels; + uint32 m_faces; + uint32 m_bytes_per_block; + uint32 m_userdata0; + uint32 m_userdata1; + crn_format m_format; +}; + +struct crn_level_info { + inline crn_level_info() + : m_struct_size(sizeof(crn_level_info)) {} + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_faces; + uint32 m_blocks_x; + uint32 m_blocks_y; + uint32 m_bytes_per_block; + crn_format m_format; +}; + +// Returns the FOURCC format code corresponding to the specified CRN format. +uint32 crnd_crn_format_to_fourcc(crn_format fmt); + +// Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. +crn_format crnd_get_fundamental_dxt_format(crn_format fmt); + +// Returns the size of the crn_format in bits/texel (either 4 or 8). +uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); + +// Returns the number of bytes per DXTn block (8 or 16). +uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); + +// Validates the entire file by checking the header and data CRC's. +// This is not something you want to be doing much! +// The crn_file_info.m_struct_size field must be set before calling this function. +bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); + +// Retrieves texture information from the CRN file. +// The crn_texture_info.m_struct_size field must be set before calling this function. +bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); + +// Retrieves mipmap level specific information from the CRN file. +// The crn_level_info.m_struct_size field must be set before calling this function. +bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); + +// Transcode/unpack context handle. +typedef void* crnd_unpack_context; + +// crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. +// Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. +// Don't call this once per mip level (unless you absolutely must)! +// This function allocates enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). +// Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. +// pData must point to a buffer holding all of the compressed .CRN file data. +// This buffer must be stable until crnd_unpack_end() is called. +// Returns NULL if out of memory, or if any of the input parameters are invalid. +crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); + +// Returns a pointer to the compressed .CRN data associated with a crnd_unpack_context. +// Returns false if any of the input parameters are invalid. +bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); + +// crnd_unpack_level() - Transcodes the specified mipmap level to a destination buffer in cached or write combined memory. +// pContext - Context created by a call to crnd_unpack_begin(). +// ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. +// dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. +// row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. +// level_index - mipmap level index, where 0 is the largest/first level. +// Returns false if any of the input parameters, or the compressed stream, are invalid. +// This function does not allocate any memory. +bool crnd_unpack_level( + crnd_unpack_context pContext, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + +// crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. +// See the crnd_create_segmented_file() API below. +// Segmented files allow the user to control where the compressed mipmap data is stored. +bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + +// crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified unpack context. +// Returns false if the context is NULL, or if it points to an invalid context. +// This function frees all memory associated with the context. +bool crnd_unpack_end(crnd_unpack_context pContext); + +// The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: +// - Base data: Header + compression tables +// - Level data: Individual mipmap levels +// This allows mipmap levels from multiple CRN files to be tightly packed together into single files. + +// Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. +const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); + +// Returns the compressed size of the texture's header and compression tables (but no levels). +uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); + +// Creates a "segmented" CRN texture from a normal CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. +// base_data_size must be >= crnd_get_base_data_size(). +// The base data will contain the CRN header and compression tables, but no mipmap data. +bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); + +} // namespace unitycrnd + +// Low-level CRN file header cracking. +namespace unitycrnd { +template +struct crn_packed_uint { + inline crn_packed_uint() {} + + inline crn_packed_uint(unsigned int val) { *this = val; } + + inline crn_packed_uint(const crn_packed_uint& other) { *this = other; } + + inline crn_packed_uint& operator=(const crn_packed_uint& rhs) { + if (this != &rhs) + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + return *this; + } + + inline crn_packed_uint& operator=(unsigned int val) { + //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); + + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const { + switch (N) { + case 1: + return m_buf[0]; + case 2: + return (m_buf[0] << 8U) | m_buf[1]; + case 3: + return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: + return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; +}; + +#pragma pack(push) +#pragma pack(1) +struct crn_palette { + crn_packed_uint<3> m_ofs; + crn_packed_uint<3> m_size; + crn_packed_uint<2> m_num; +}; + +enum crn_header_flags { + // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. + cCRNHeaderFlagSegmented = 1 +}; + +struct crn_header { + enum { cCRNSigValue = ('H' << 8) | 'x' }; + + crn_packed_uint<2> m_sig; + crn_packed_uint<2> m_header_size; + crn_packed_uint<2> m_header_crc16; + + crn_packed_uint<4> m_data_size; + crn_packed_uint<2> m_data_crc16; + + crn_packed_uint<2> m_width; + crn_packed_uint<2> m_height; + + crn_packed_uint<1> m_levels; + crn_packed_uint<1> m_faces; + + crn_packed_uint<1> m_format; + crn_packed_uint<2> m_flags; + + crn_packed_uint<4> m_reserved; + crn_packed_uint<4> m_userdata0; + crn_packed_uint<4> m_userdata1; + + crn_palette m_color_endpoints; + crn_palette m_color_selectors; + + crn_palette m_alpha_endpoints; + crn_palette m_alpha_selectors; + + crn_packed_uint<2> m_tables_size; + crn_packed_uint<3> m_tables_ofs; + + // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] + crn_packed_uint<4> m_level_ofs[1]; +}; + +const unsigned int cCRNHeaderMinSize = 62U; + +#pragma pack(pop) + +} // namespace unitycrnd + +#endif // CRND_INCLUDE_CRN_DEFS_H diff --git a/Texture2DDecoder/unitycrunch/crnlib.h b/Texture2DDecoder/unitycrunch/crnlib.h new file mode 100644 index 0000000..d1186d5 --- /dev/null +++ b/Texture2DDecoder/unitycrunch/crnlib.h @@ -0,0 +1,640 @@ +// File: crnlib.h - Advanced DXTn texture compression library. +// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC +// See copyright notice and license at the end of this file. +// +// This header file contains the public crnlib declarations for DXTn, +// clustered DXTn, and CRN compression/decompression. +// +// Note: This library does NOT need to be linked into your game executable if +// all you want to do is transcode .CRN files to raw DXTn bits at run-time. +// The crn_decomp.h header file library contains all the code necessary for +// decompression. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +#ifndef CRNLIB_H +#define CRNLIB_H + +#ifdef _MSC_VER +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#define CRNLIB_VERSION 104 + +#define CRNLIB_SUPPORT_ATI_COMPRESS 0 +#define CRNLIB_SUPPORT_SQUISH 0 + +typedef unsigned char crn_uint8; +typedef unsigned short crn_uint16; +typedef unsigned int crn_uint32; +typedef signed char crn_int8; +typedef signed short crn_int16; +typedef signed int crn_int32; +typedef unsigned int crn_bool; + +// crnlib can compress to these file types. +enum crn_file_type { + // .CRN + cCRNFileTypeCRN = 0, + + // .DDS using regular DXT or clustered DXT + cCRNFileTypeDDS, + + cCRNFileTypeForceDWORD = 0xFFFFFFFF +}; + +// Supported compressed pixel formats. +// Basically all the standard DX9 formats, with some swizzled DXT5 formats +// (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. +enum crn_format { + cCRNFmtInvalid = -1, + + cCRNFmtDXT1 = 0, + + cCRNFmtFirstValid = cCRNFmtDXT1, + + // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. + cCRNFmtDXT3, + + cCRNFmtDXT5, + + // Various DXT5 derivatives + cCRNFmtDXT5_CCxY, // Luma-chroma + cCRNFmtDXT5_xGxR, // Swizzled 2-component + cCRNFmtDXT5_xGBR, // Swizzled 3-component + cCRNFmtDXT5_AGBR, // Swizzled 4-component + + // ATI 3DC and X360 DXN + cCRNFmtDXN_XY, + cCRNFmtDXN_YX, + + // DXT5 alpha blocks only + cCRNFmtDXT5A, + + cCRNFmtETC1, + cCRNFmtETC2, + cCRNFmtETC2A, + cCRNFmtETC1S, + cCRNFmtETC2AS, + + cCRNFmtTotal, + + cCRNFmtForceDWORD = 0xFFFFFFFF +}; + +// Various library/file format limits. +enum crn_limits { + // Max. mipmap level resolution on any axis. + cCRNMaxLevelResolution = 4096, + + cCRNMinPaletteSize = 8, + cCRNMaxPaletteSize = 8192, + + cCRNMaxFaces = 6, + cCRNMaxLevels = 16, + + cCRNMaxHelperThreads = 15, + + cCRNMinQualityLevel = 0, + cCRNMaxQualityLevel = 255 +}; + +// CRN/DDS compression flags. +// See the m_flags member in the crn_comp_params struct, below. +enum crn_comp_flags { + // Enables perceptual colorspace distance metrics if set. + // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! + // Default: Set + cCRNCompFlagPerceptual = 1, + + // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. + // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine + // where large macroblocks can be used without artifacts isn't perfect. + // Default: Set. + cCRNCompFlagHierarchical = 2, + + // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. + // Default: Not set. + cCRNCompFlagQuick = 4, + + // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. + // DXT5: OK to use both DXT5 block types. + // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. + // Default: Set. + cCRNCompFlagUseBothBlockTypes = 8, + + // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). + // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. + // Default: Not set. + cCRNCompFlagUseTransparentIndicesForBlack = 16, + + // Disables endpoint caching, for more deterministic output. + // Currently only used when writing to .DDS files. + // Default: Not set. + cCRNCompFlagDisableEndpointCaching = 32, + + // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. + // Default: Not set. + cCRNCompFlagManualPaletteSizes = 64, + + // If enabled, DXT1A alpha blocks are used to encode single bit transparency. + // Default: Not set. + cCRNCompFlagDXT1AForTransparency = 128, + + // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). + // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks + // with some chroma present if doing so will ultimately lead to lower luma error). + // Only enable on grayscale source images. + // Default: Not set. + cCRNCompFlagGrayscaleSampling = 256, + + // If enabled, debug information will be output during compression. + // Default: Not set. + cCRNCompFlagDebugging = 0x80000000, + + cCRNCompFlagForceDWORD = 0xFFFFFFFF +}; + +// Controls DXTn quality vs. speed control - only used when compressing to .DDS. +enum crn_dxt_quality { + cCRNDXTQualitySuperFast, + cCRNDXTQualityFast, + cCRNDXTQualityNormal, + cCRNDXTQualityBetter, + cCRNDXTQualityUber, + + cCRNDXTQualityTotal, + + cCRNDXTQualityForceDWORD = 0xFFFFFFFF +}; + +// Which DXTn compressor to use when compressing to plain (non-clustered) .DDS. +enum crn_dxt_compressor_type { + cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) + cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor + cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) + +#if CRNLIB_SUPPORT_ATI_COMPRESS + cCRNDXTCompressorATI, +#endif + +#if CRNLIB_SUPPORT_SQUISH + cCRNDXTCompressorSquish, +#endif + + cCRNTotalDXTCompressors, + + cCRNDXTCompressorForceDWORD = 0xFFFFFFFF +}; + +// Progress callback function. +// Processing will stop prematurely (and fail) if the callback returns false. +// phase_index, total_phases - high level progress +// subphase_index, total_subphases - progress within current phase +typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); + +// CRN/DDS compression parameters struct. +struct crn_comp_params { + inline crn_comp_params() { clear(); } + + // Clear struct to default parameters. + inline void clear() { + m_size_of_obj = sizeof(*this); + m_file_type = cCRNFileTypeCRN; + m_faces = 1; + m_width = 0; + m_height = 0; + m_levels = 1; + m_format = cCRNFmtDXT1; + m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + m_pImages[f][l] = NULL; + + m_target_bitrate = 0.0f; + m_quality_level = cCRNMaxQualityLevel; + m_dxt1a_alpha_threshold = 128; + m_dxt_quality = cCRNDXTQualityUber; + m_dxt_compressor_type = cCRNDXTCompressorCRN; + m_alpha_component = 3; + + m_crn_adaptive_tile_color_psnr_derating = 2.0f; + m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; + m_crn_color_endpoint_palette_size = 0; + m_crn_color_selector_palette_size = 0; + m_crn_alpha_endpoint_palette_size = 0; + m_crn_alpha_selector_palette_size = 0; + + m_num_helper_threads = 0; + m_userdata0 = 0; + m_userdata1 = 0; + m_pProgress_func = NULL; + m_pProgress_func_data = NULL; + } + + inline bool operator==(const crn_comp_params& rhs) const { +#define CRNLIB_COMP(x) \ + do { \ + if ((x) != (rhs.x)) \ + return false; \ + } while (0) + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_file_type); + CRNLIB_COMP(m_faces); + CRNLIB_COMP(m_width); + CRNLIB_COMP(m_height); + CRNLIB_COMP(m_levels); + CRNLIB_COMP(m_format); + CRNLIB_COMP(m_flags); + CRNLIB_COMP(m_target_bitrate); + CRNLIB_COMP(m_quality_level); + CRNLIB_COMP(m_dxt1a_alpha_threshold); + CRNLIB_COMP(m_dxt_quality); + CRNLIB_COMP(m_dxt_compressor_type); + CRNLIB_COMP(m_alpha_component); + CRNLIB_COMP(m_crn_adaptive_tile_color_psnr_derating); + CRNLIB_COMP(m_crn_adaptive_tile_alpha_psnr_derating); + CRNLIB_COMP(m_crn_color_endpoint_palette_size); + CRNLIB_COMP(m_crn_color_selector_palette_size); + CRNLIB_COMP(m_crn_alpha_endpoint_palette_size); + CRNLIB_COMP(m_crn_alpha_selector_palette_size); + CRNLIB_COMP(m_num_helper_threads); + CRNLIB_COMP(m_userdata0); + CRNLIB_COMP(m_userdata1); + CRNLIB_COMP(m_pProgress_func); + CRNLIB_COMP(m_pProgress_func_data); + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + CRNLIB_COMP(m_pImages[f][l]); + +#undef CRNLIB_COMP + return true; + } + + // Returns true if the input parameters are reasonable. + inline bool check() const { + if ((m_file_type > cCRNFileTypeDDS) || + (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || + (m_dxt1a_alpha_threshold > 255) || + ((m_faces != 1) && (m_faces != 6)) || + ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || + ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || + ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || + ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || + ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || + (m_alpha_component > 3) || + (m_num_helper_threads > cCRNMaxHelperThreads) || + (m_dxt_quality > cCRNDXTQualityUber) || + (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) { + return false; + } + return true; + } + + // Helper to set/get flags from m_flags member. + inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } + inline void set_flag(crn_comp_flags flag, bool val) { + m_flags &= ~flag; + if (val) + m_flags |= flag; + } + + crn_uint32 m_size_of_obj; + + crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. + + crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) + crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + + crn_format m_format; // Output pixel format. + + crn_uint32 m_flags; // see crn_comp_flags enum + + // Array of pointers to 32bpp input images. + const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; + + // Target bitrate - if non-zero, the compressor will use an interpolative search to find the + // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll + // try disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and redo the search. This process can be pretty slow. + float m_target_bitrate; + + // Desired quality level. + // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. + crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] + + // DXTn compression parameters. + crn_uint32 m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + crn_dxt_compressor_type m_dxt_compressor_type; + + // Alpha channel's component. Defaults to 3. + crn_uint32 m_alpha_component; + + // Various low-level CRN specific parameters. + float m_crn_adaptive_tile_color_psnr_derating; + float m_crn_adaptive_tile_alpha_psnr_derating; + + crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + // Number of helper threads to create during compression. 0=no threading. + crn_uint32 m_num_helper_threads; + + // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. + crn_uint32 m_userdata0; + crn_uint32 m_userdata1; + + // User provided progress callback. + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; +}; + +// Mipmap generator's mode. +enum crn_mip_mode { + cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps + cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps + cCRNMipModeGenerateMips, // Always generate new mipmaps + cCRNMipModeNoMips, // Output texture has no mipmaps + + cCRNMipModeTotal, + + cCRNModeForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_mip_mode_desc(crn_mip_mode m); +const char* crn_get_mip_mode_name(crn_mip_mode m); + +// Mipmap generator's filter kernel. +enum crn_mip_filter { + cCRNMipFilterBox, + cCRNMipFilterTent, + cCRNMipFilterLanczos4, + cCRNMipFilterMitchell, + cCRNMipFilterKaiser, // Kaiser=default mipmap filter + + cCRNMipFilterTotal, + + cCRNMipFilterForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_mip_filter_name(crn_mip_filter f); + +// Mipmap generator's scale mode. +enum crn_scale_mode { + cCRNSMDisabled, + cCRNSMAbsolute, + cCRNSMRelative, + cCRNSMLowerPow2, + cCRNSMNearestPow2, + cCRNSMNextPow2, + + cCRNSMTotal, + + cCRNSMForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_scale_mode_desc(crn_scale_mode sm); + +// Mipmap generator parameters. +struct crn_mipmap_params { + inline crn_mipmap_params() { clear(); } + + inline void clear() { + m_size_of_obj = sizeof(*this); + m_mode = cCRNMipModeUseSourceOrGenerateMips; + m_filter = cCRNMipFilterKaiser; + m_gamma_filtering = true; + m_gamma = 2.2f; + // Default "blurriness" factor of .9 actually sharpens the output a little. + m_blurriness = .9f; + m_renormalize = false; + m_tiled = false; + m_max_levels = cCRNMaxLevels; + m_min_mip_size = 1; + + m_scale_mode = cCRNSMDisabled; + m_scale_x = 1.0f; + m_scale_y = 1.0f; + + m_window_left = 0; + m_window_top = 0; + m_window_right = 0; + m_window_bottom = 0; + + m_clamp_scale = false; + m_clamp_width = 0; + m_clamp_height = 0; + } + + inline bool check() const { return true; } + + inline bool operator==(const crn_mipmap_params& rhs) const { +#define CRNLIB_COMP(x) \ + do { \ + if ((x) != (rhs.x)) \ + return false; \ + } while (0) + CRNLIB_COMP(m_size_of_obj); + CRNLIB_COMP(m_mode); + CRNLIB_COMP(m_filter); + CRNLIB_COMP(m_gamma_filtering); + CRNLIB_COMP(m_gamma); + CRNLIB_COMP(m_blurriness); + CRNLIB_COMP(m_renormalize); + CRNLIB_COMP(m_tiled); + CRNLIB_COMP(m_max_levels); + CRNLIB_COMP(m_min_mip_size); + CRNLIB_COMP(m_scale_mode); + CRNLIB_COMP(m_scale_x); + CRNLIB_COMP(m_scale_y); + CRNLIB_COMP(m_window_left); + CRNLIB_COMP(m_window_top); + CRNLIB_COMP(m_window_right); + CRNLIB_COMP(m_window_bottom); + CRNLIB_COMP(m_clamp_scale); + CRNLIB_COMP(m_clamp_width); + CRNLIB_COMP(m_clamp_height); + return true; +#undef CRNLIB_COMP + } + crn_uint32 m_size_of_obj; + + crn_mip_mode m_mode; + crn_mip_filter m_filter; + + crn_bool m_gamma_filtering; + float m_gamma; + + float m_blurriness; + + crn_uint32 m_max_levels; + crn_uint32 m_min_mip_size; + + crn_bool m_renormalize; + crn_bool m_tiled; + + crn_scale_mode m_scale_mode; + float m_scale_x; + float m_scale_y; + + crn_uint32 m_window_left; + crn_uint32 m_window_top; + crn_uint32 m_window_right; + crn_uint32 m_window_bottom; + + crn_bool m_clamp_scale; + crn_uint32 m_clamp_width; + crn_uint32 m_clamp_height; +}; + +// -------- High-level helper function definitions for CDN/DDS compression. + +#ifndef CRNLIB_MIN_ALLOC_ALIGNMENT +#define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 +#endif + +// Function to set an optional user provided memory allocation/reallocation/msize routines. +// By default, crnlib just uses malloc(), free(), etc. for all allocations. +typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); +typedef size_t (*crn_msize_func)(void* p, void* pUser_data); +void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); + +// Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). +void crn_free_block(void* pBlock); + +// Compresses a 32-bit/pixel texture to either: a regular DX9 DDS file, a "clustered" (or reduced entropy) DX9 DDS file, or a CRN file in memory. +// Input parameters: +// comp_params is the compression parameters struct, defined above. +// compressed_size will be set to the size of the returned memory block containing the output file. +// The returned block must be freed by calling crn_free_block(). +// *pActual_quality_level will be set to the actual quality level used to compress the image. May be NULL. +// *pActual_bitrate will be set to the output file's effective bitrate, possibly taking into account LZMA compression. May be NULL. +// Return value: +// The compressed file data, or NULL on failure. +// compressed_size will be set to the size of the returned memory buffer. +// Notes: +// A "regular" DDS file is compressed using normal DXTn compression at the specified DXT quality level. +// A "clustered" DDS file is compressed using clustered DXTn compression to either the target bitrate or the specified integer quality factor. +// The output file is a standard DX9 format DDS file, except the compressor assumes you will be later losslessly compressing the DDS output file using the LZMA algorithm. +// A texture is defined as an array of 1 or 6 "faces" (6 faces=cubemap), where each "face" consists of between [1,cCRNMaxLevels] mipmap levels. +// Mipmap levels are simple 32-bit 2D images with a pitch of width*sizeof(uint32), arranged in the usual raster order (top scanline first). +// The image pixels may be grayscale (YYYX bytes in memory), grayscale/alpha (YYYA in memory), 24-bit (RGBX in memory), or 32-bit (RGBA) colors (where "X"=don't care). +// RGB color data is generally assumed to be in the sRGB colorspace. If not, be sure to clear the "cCRNCompFlagPerceptual" in the crn_comp_params struct! +void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL); + +// Like the above function, except this function can also do things like generate mipmaps, and resize or crop the input texture before compression. +// The actual operations performed are controlled by the crn_mipmap_params struct members. +// Be sure to set the "m_gamma_filtering" member of crn_mipmap_params to false if the input texture is not sRGB. +void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL); + +// Transcodes an entire CRN file to DDS using the crn_decomp.h header file library to do most of the heavy lifting. +// The output DDS file's format is guaranteed to be one of the DXTn formats in the crn_format enum. +// This is a fast operation, because the CRN format is explicitly designed to be efficiently transcodable to DXTn. +// For more control over decompression, see the lower-level helper functions in crn_decomp.h, which do not depend at all on crnlib. +void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size); + +// Decompresses an entire DDS file in any supported format to uncompressed 32-bit/pixel image(s). +// See the crnlib::pixel_format enum in inc/dds_defs.h for a list of the supported DDS formats. +// You are responsible for freeing each image block, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. +struct crn_texture_desc { + crn_uint32 m_faces; + crn_uint32 m_width; + crn_uint32 m_height; + crn_uint32 m_levels; + crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format +}; +bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc); + +// Frees all images allocated by crn_decompress_dds_to_images(). +void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc); + +// -------- crn_format related helpers functions. + +// Returns the FOURCC format equivalent to the specified crn_format. +crn_uint32 crn_get_format_fourcc(crn_format fmt); + +// Returns the crn_format's bits per texel. +crn_uint32 crn_get_format_bits_per_texel(crn_format fmt); + +// Returns the crn_format's number of bytes per block. +crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt); + +// Returns the non-swizzled, basic DXTn version of the specified crn_format. +// This is the format you would supply D3D or OpenGL. +crn_format crn_get_fundamental_dxt_format(crn_format fmt); + +// -------- String helpers. + +// Converts a crn_file_type to a string. +const char* crn_get_file_type_ext(crn_file_type file_type); + +// Converts a crn_format to a string. +const char* crn_get_format_string(crn_format fmt); + +// Converts a crn_dxt_quality to a string. +const char* crn_get_dxt_quality_string(crn_dxt_quality q); + +// -------- Low-level DXTn 4x4 block compressor API + +// crnlib's DXTn endpoint optimizer actually supports any number of source pixels (i.e. from 1 to thousands, not just 16), +// but for simplicity this API only supports 4x4 texel blocks. +typedef void* crn_block_compressor_context_t; + +// Create a DXTn block compressor. +// This function only supports the basic/nonswizzled "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. +// Avoid calling this multiple times if you intend on compressing many blocks, because it allocates some memory. +crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params); + +// Compresses a block of 16 pixels to the destination DXTn block. +// pDst_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). +// pPixels should be an array of 16 crn_uint32's. Each crn_uint32 must be r,g,b,a (r is always first) in memory. +void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block); + +// Frees a DXTn block compressor. +void crn_free_block_compressor(crn_block_compressor_context_t pContext); + +// Unpacks a compressed block to pDst_pixels. +// pSrc_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). +// pDst_pixel should be an array of 16 crn_uint32's. Each uint32 will be r,g,b,a (r is always first) in memory. +// crn_fmt should be one of the "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. +// The various swizzled DXT5 formats (such as cCRNFmtDXT5_xGBR, etc.) will be unpacked as if they where plain DXT5. +// Returns false if the crn_fmt is invalid. +bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels, crn_format crn_fmt); + +#endif // CRNLIB_H + +//------------------------------------------------------------------------------ +// +// crnlib uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------